kernel/bpf/verifier.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3  * Copyright (c) 2016 Facebook
   4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
   5  */
   6 #include <uapi/linux/btf.h>
   7 #include <linux/bpf-cgroup.h>
   8 #include <linux/kernel.h>
   9 #include <linux/types.h>
  10 #include <linux/slab.h>
  11 #include <linux/bpf.h>
  12 #include <linux/btf.h>
  13 #include <linux/bpf_verifier.h>
  14 #include <linux/filter.h>
  15 #include <net/netlink.h>
  16 #include <linux/file.h>
  17 #include <linux/vmalloc.h>
  18 #include <linux/stringify.h>
  19 #include <linux/bsearch.h>
  20 #include <linux/sort.h>
  21 #include <linux/perf_event.h>
  22 #include <linux/ctype.h>
  23 #include <linux/error-injection.h>
  24 #include <linux/bpf_lsm.h>
  25 #include <linux/btf_ids.h>
  26 #include <linux/poison.h>
  27
  28 #include "disasm.h"
  29
  30 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  31 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
  32         [_id] = & _name ## _verifier_ops,
  33 #define BPF_MAP_TYPE(_id, _ops)
  34 #define BPF_LINK_TYPE(_id, _name)
  35 #include <linux/bpf_types.h>
  36 #undef BPF_PROG_TYPE
  37 #undef BPF_MAP_TYPE
  38 #undef BPF_LINK_TYPE
  39 };
  40
  41 /* bpf_check() is a static code analyzer that walks eBPF program
  42  * instruction by instruction and updates register/stack state.
  43  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
  44  *
  45  * The first pass is depth-first-search to check that the program is a DAG.
  46  * It rejects the following programs:
  47  * - larger than BPF_MAXINSNS insns
  48  * - if loop is present (detected via back-edge)
  49  * - unreachable insns exist (shouldn't be a forest. program = one function)
  50  * - out of bounds or malformed jumps
  51  * The second pass is all possible path descent from the 1st insn.
  52  * Since it's analyzing all paths through the program, the length of the
  53  * analysis is limited to 64k insn, which may be hit even if total number of
  54  * insn is less then 4K, but there are too many branches that change stack/regs.
  55  * Number of 'branches to be analyzed' is limited to 1k
  56  *
  57  * On entry to each instruction, each register has a type, and the instruction
  58  * changes the types of the registers depending on instruction semantics.
  59  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
  60  * copied to R1.
  61  *
  62  * All registers are 64-bit.
  63  * R0 - return register
  64  * R1-R5 argument passing registers
  65  * R6-R9 callee saved registers
  66  * R10 - frame pointer read-only
  67  *
  68  * At the start of BPF program the register R1 contains a pointer to bpf_context
  69  * and has type PTR_TO_CTX.
  70  *
  71  * Verifier tracks arithmetic operations on pointers in case:
  72  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
  73  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
  74  * 1st insn copies R10 (which has FRAME_PTR) type into R1
  75  * and 2nd arithmetic instruction is pattern matched to recognize
  76  * that it wants to construct a pointer to some element within stack.
  77  * So after 2nd insn, the register R1 has type PTR_TO_STACK
  78  * (and -20 constant is saved for further stack bounds checking).
  79  * Meaning that this reg is a pointer to stack plus known immediate constant.
  80  *
  81  * Most of the time the registers have SCALAR_VALUE type, which
  82  * means the register has some value, but it's not a valid pointer.
  83  * (like pointer plus pointer becomes SCALAR_VALUE type)
  84  *
  85  * When verifier sees load or store instructions the type of base register
  86  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
  87  * four pointer types recognized by check_mem_access() function.
  88  *
  89  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  90  * and the range of [ptr, ptr + map's value_size) is accessible.
  91  *
  92  * registers used to pass values to function calls are checked against
  93  * function argument constraints.
  94  *
  95  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
  96  * It means that the register type passed to this function must be
  97  * PTR_TO_STACK and it will be used inside the function as
  98  * 'pointer to map element key'
  99  *
 100  * For example the argument constraints for bpf_map_lookup_elem():
 101  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
 102  *   .arg1_type = ARG_CONST_MAP_PTR,
 103  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
 104  *
 105  * ret_type says that this function returns 'pointer to map elem value or null'
 106  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 107  * 2nd argument should be a pointer to stack, which will be used inside
 108  * the helper function as a pointer to map element key.
 109  *
 110  * On the kernel side the helper function looks like:
 111  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 112  * {
 113  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 114  *    void *key = (void *) (unsigned long) r2;
 115  *    void *value;
 116  *
 117  *    here kernel can access 'key' and 'map' pointers safely, knowing that
 118  *    [key, key + map->key_size) bytes are valid and were initialized on
 119  *    the stack of eBPF program.
 120  * }
 121  *
 122  * Corresponding eBPF program may look like:
 123  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 124  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 125  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 126  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 127  * here verifier looks at prototype of map_lookup_elem() and sees:
 128  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 129  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 130  *
 131  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 132  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 133  * and were initialized prior to this call.
 134  * If it's ok, then verifier allows this BPF_CALL insn and looks at
 135  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 136  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 137  * returns either pointer to map value or NULL.
 138  *
 139  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 140  * insn, the register holding that pointer in the true branch changes state to
 141  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 142  * branch. See check_cond_jmp_op().
 143  *
 144  * After the call R0 is set to return type of the function and registers R1-R5
 145  * are set to NOT_INIT to indicate that they are no longer readable.
 146  *
 147  * The following reference types represent a potential reference to a kernel
 148  * resource which, after first being allocated, must be checked and freed by
 149  * the BPF program:
 150  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
 151  *
 152  * When the verifier sees a helper call return a reference type, it allocates a
 153  * pointer id for the reference and stores it in the current function state.
 154  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
 155  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
 156  * passes through a NULL-check conditional. For the branch wherein the state is
 157  * changed to CONST_IMM, the verifier releases the reference.
 158  *
 159  * For each helper function that allocates a reference, such as
 160  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
 161  * bpf_sk_release(). When a reference type passes into the release function,
 162  * the verifier also releases the reference. If any unchecked or unreleased
 163  * reference remains at the end of the program, the verifier rejects it.
 164  */
 165
 166 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
 167 struct bpf_verifier_stack_elem {
 168         /* verifer state is 'st'
 169          * before processing instruction 'insn_idx'
 170          * and after processing instruction 'prev_insn_idx'
 171          */
 172         struct bpf_verifier_state st;
 173         int insn_idx;
 174         int prev_insn_idx;
 175         struct bpf_verifier_stack_elem *next;
 176         /* length of verifier log at the time this state was pushed on stack */
 177         u32 log_pos;
 178 };
 179
 180 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ    8192
 181 #define BPF_COMPLEXITY_LIMIT_STATES     64
 182
 183 #define BPF_MAP_KEY_POISON      (1ULL << 63)
 184 #define BPF_MAP_KEY_SEEN        (1ULL << 62)
 185
 186 #define BPF_MAP_PTR_UNPRIV      1UL
 187 #define BPF_MAP_PTR_POISON      ((void *)((0xeB9FUL << 1) +     \
 188                                           POISON_POINTER_DELTA))
 189 #define BPF_MAP_PTR(X)          ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
 190
 191 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
 192 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
 193
 194 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
 195 {
 196         return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
 197 }
 198
 199 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
 200 {
 201         return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
 202 }
 203
 204 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
 205                               const struct bpf_map *map, bool unpriv)
 206 {
 207         BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
 208         unpriv |= bpf_map_ptr_unpriv(aux);
 209         aux->map_ptr_state = (unsigned long)map |
 210                              (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
 211 }
 212
 213 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
 214 {
 215         return aux->map_key_state & BPF_MAP_KEY_POISON;
 216 }
 217
 218 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
 219 {
 220         return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
 221 }
 222
 223 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
 224 {
 225         return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
 226 }
 227
 228 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
 229 {
 230         bool poisoned = bpf_map_key_poisoned(aux);
 231
 232         aux->map_key_state = state | BPF_MAP_KEY_SEEN |
 233                              (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
 234 }
 235
 236 static bool bpf_pseudo_call(const struct bpf_insn *insn)
 237 {
 238         return insn->code == (BPF_JMP | BPF_CALL) &&
 239                insn->src_reg == BPF_PSEUDO_CALL;
 240 }
 241
 242 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
 243 {
 244         return insn->code == (BPF_JMP | BPF_CALL) &&
 245                insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
 246 }
 247
 248 struct bpf_call_arg_meta {
 249         struct bpf_map *map_ptr;
 250         bool raw_mode;
 251         bool pkt_access;
 252         u8 release_regno;
 253         int regno;
 254         int access_size;
 255         int mem_size;
 256         u64 msize_max_value;
 257         int ref_obj_id;
 258         int map_uid;
 259         int func_id;
 260         struct btf *btf;
 261         u32 btf_id;
 262         struct btf *ret_btf;
 263         u32 ret_btf_id;
 264         u32 subprogno;
 265         struct btf_field *kptr_field;
 266         u8 uninit_dynptr_regno;
 267 };
 268
 269 struct btf *btf_vmlinux;
 270
 271 static DEFINE_MUTEX(bpf_verifier_lock);
 272
 273 static const struct bpf_line_info *
 274 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
 275 {
 276         const struct bpf_line_info *linfo;
 277         const struct bpf_prog *prog;
 278         u32 i, nr_linfo;
 279
 280         prog = env->prog;
 281         nr_linfo = prog->aux->nr_linfo;
 282
 283         if (!nr_linfo || insn_off >= prog->len)
 284                 return NULL;
 285
 286         linfo = prog->aux->linfo;
 287         for (i = 1; i < nr_linfo; i++)
 288                 if (insn_off < linfo[i].insn_off)
 289                         break;
 290
 291         return &linfo[i - 1];
 292 }
 293
 294 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 295                        va_list args)
 296 {
 297         unsigned int n;
 298
 299         n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 300
 301         WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
 302                   "verifier log line truncated - local buffer too short\n");
 303
 304         if (log->level == BPF_LOG_KERNEL) {
 305                 bool newline = n > 0 && log->kbuf[n - 1] == '\n';
 306
 307                 pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
 308                 return;
 309         }
 310
 311         n = min(log->len_total - log->len_used - 1, n);
 312         log->kbuf[n] = '\0';
 313         if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
 314                 log->len_used += n;
 315         else
 316                 log->ubuf = NULL;
 317 }
 318
 319 static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
 320 {
 321         char zero = 0;
 322
 323         if (!bpf_verifier_log_needed(log))
 324                 return;
 325
 326         log->len_used = new_pos;
 327         if (put_user(zero, log->ubuf + new_pos))
 328                 log->ubuf = NULL;
 329 }
 330
 331 /* log_level controls verbosity level of eBPF verifier.
 332  * bpf_verifier_log_write() is used to dump the verification trace to the log,
 333  * so the user can figure out what's wrong with the program
 334  */
 335 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 336                                            const char *fmt, ...)
 337 {
 338         va_list args;
 339
 340         if (!bpf_verifier_log_needed(&env->log))
 341                 return;
 342
 343         va_start(args, fmt);
 344         bpf_verifier_vlog(&env->log, fmt, args);
 345         va_end(args);
 346 }
 347 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
 348
 349 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 350 {
 351         struct bpf_verifier_env *env = private_data;
 352         va_list args;
 353
 354         if (!bpf_verifier_log_needed(&env->log))
 355                 return;
 356
 357         va_start(args, fmt);
 358         bpf_verifier_vlog(&env->log, fmt, args);
 359         va_end(args);
 360 }
 361
 362 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
 363                             const char *fmt, ...)
 364 {
 365         va_list args;
 366
 367         if (!bpf_verifier_log_needed(log))
 368                 return;
 369
 370         va_start(args, fmt);
 371         bpf_verifier_vlog(log, fmt, args);
 372         va_end(args);
 373 }
 374 EXPORT_SYMBOL_GPL(bpf_log);
 375
 376 static const char *ltrim(const char *s)
 377 {
 378         while (isspace(*s))
 379                 s++;
 380
 381         return s;
 382 }
 383
 384 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
 385                                          u32 insn_off,
 386                                          const char *prefix_fmt, ...)
 387 {
 388         const struct bpf_line_info *linfo;
 389
 390         if (!bpf_verifier_log_needed(&env->log))
 391                 return;
 392
 393         linfo = find_linfo(env, insn_off);
 394         if (!linfo || linfo == env->prev_linfo)
 395                 return;
 396
 397         if (prefix_fmt) {
 398                 va_list args;
 399
 400                 va_start(args, prefix_fmt);
 401                 bpf_verifier_vlog(&env->log, prefix_fmt, args);
 402                 va_end(args);
 403         }
 404
 405         verbose(env, "%s\n",
 406                 ltrim(btf_name_by_offset(env->prog->aux->btf,
 407                                          linfo->line_off)));
 408
 409         env->prev_linfo = linfo;
 410 }
 411
 412 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
 413                                    struct bpf_reg_state *reg,
 414                                    struct tnum *range, const char *ctx,
 415                                    const char *reg_name)
 416 {
 417         char tn_buf[48];
 418
 419         verbose(env, "At %s the register %s ", ctx, reg_name);
 420         if (!tnum_is_unknown(reg->var_off)) {
 421                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 422                 verbose(env, "has value %s", tn_buf);
 423         } else {
 424                 verbose(env, "has unknown scalar value");
 425         }
 426         tnum_strn(tn_buf, sizeof(tn_buf), *range);
 427         verbose(env, " should have been in %s\n", tn_buf);
 428 }
 429
 430 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 431 {
 432         type = base_type(type);
 433         return type == PTR_TO_PACKET ||
 434                type == PTR_TO_PACKET_META;
 435 }
 436
 437 static bool type_is_sk_pointer(enum bpf_reg_type type)
 438 {
 439         return type == PTR_TO_SOCKET ||
 440                 type == PTR_TO_SOCK_COMMON ||
 441                 type == PTR_TO_TCP_SOCK ||
 442                 type == PTR_TO_XDP_SOCK;
 443 }
 444
 445 static bool reg_type_not_null(enum bpf_reg_type type)
 446 {
 447         return type == PTR_TO_SOCKET ||
 448                 type == PTR_TO_TCP_SOCK ||
 449                 type == PTR_TO_MAP_VALUE ||
 450                 type == PTR_TO_MAP_KEY ||
 451                 type == PTR_TO_SOCK_COMMON;
 452 }
 453
 454 static bool type_is_ptr_alloc_obj(u32 type)
 455 {
 456         return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
 457 }
 458
 459 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
 460 {
 461         struct btf_record *rec = NULL;
 462         struct btf_struct_meta *meta;
 463
 464         if (reg->type == PTR_TO_MAP_VALUE) {
 465                 rec = reg->map_ptr->record;
 466         } else if (type_is_ptr_alloc_obj(reg->type)) {
 467                 meta = btf_find_struct_meta(reg->btf, reg->btf_id);
 468                 if (meta)
 469                         rec = meta->record;
 470         }
 471         return rec;
 472 }
 473
 474 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 475 {
 476         return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
 477 }
 478
 479 static bool type_is_rdonly_mem(u32 type)
 480 {
 481         return type & MEM_RDONLY;
 482 }
 483
 484 static bool type_may_be_null(u32 type)
 485 {
 486         return type & PTR_MAYBE_NULL;
 487 }
 488
 489 static bool is_acquire_function(enum bpf_func_id func_id,
 490                                 const struct bpf_map *map)
 491 {
 492         enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
 493
 494         if (func_id == BPF_FUNC_sk_lookup_tcp ||
 495             func_id == BPF_FUNC_sk_lookup_udp ||
 496             func_id == BPF_FUNC_skc_lookup_tcp ||
 497             func_id == BPF_FUNC_ringbuf_reserve ||
 498             func_id == BPF_FUNC_kptr_xchg)
 499                 return true;
 500
 501         if (func_id == BPF_FUNC_map_lookup_elem &&
 502             (map_type == BPF_MAP_TYPE_SOCKMAP ||
 503              map_type == BPF_MAP_TYPE_SOCKHASH))
 504                 return true;
 505
 506         return false;
 507 }
 508
 509 static bool is_ptr_cast_function(enum bpf_func_id func_id)
 510 {
 511         return func_id == BPF_FUNC_tcp_sock ||
 512                 func_id == BPF_FUNC_sk_fullsock ||
 513                 func_id == BPF_FUNC_skc_to_tcp_sock ||
 514                 func_id == BPF_FUNC_skc_to_tcp6_sock ||
 515                 func_id == BPF_FUNC_skc_to_udp6_sock ||
 516                 func_id == BPF_FUNC_skc_to_mptcp_sock ||
 517                 func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
 518                 func_id == BPF_FUNC_skc_to_tcp_request_sock;
 519 }
 520
 521 static bool is_dynptr_ref_function(enum bpf_func_id func_id)
 522 {
 523         return func_id == BPF_FUNC_dynptr_data;
 524 }
 525
 526 static bool is_callback_calling_function(enum bpf_func_id func_id)
 527 {
 528         return func_id == BPF_FUNC_for_each_map_elem ||
 529                func_id == BPF_FUNC_timer_set_callback ||
 530                func_id == BPF_FUNC_find_vma ||
 531                func_id == BPF_FUNC_loop ||
 532                func_id == BPF_FUNC_user_ringbuf_drain;
 533 }
 534
 535 static bool is_storage_get_function(enum bpf_func_id func_id)
 536 {
 537         return func_id == BPF_FUNC_sk_storage_get ||
 538                func_id == BPF_FUNC_inode_storage_get ||
 539                func_id == BPF_FUNC_task_storage_get ||
 540                func_id == BPF_FUNC_cgrp_storage_get;
 541 }
 542
 543 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
 544                                         const struct bpf_map *map)
 545 {
 546         int ref_obj_uses = 0;
 547
 548         if (is_ptr_cast_function(func_id))
 549                 ref_obj_uses++;
 550         if (is_acquire_function(func_id, map))
 551                 ref_obj_uses++;
 552         if (is_dynptr_ref_function(func_id))
 553                 ref_obj_uses++;
 554
 555         return ref_obj_uses > 1;
 556 }
 557
 558 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
 559 {
 560         return BPF_CLASS(insn->code) == BPF_STX &&
 561                BPF_MODE(insn->code) == BPF_ATOMIC &&
 562                insn->imm == BPF_CMPXCHG;
 563 }
 564
 565 /* string representation of 'enum bpf_reg_type'
 566  *
 567  * Note that reg_type_str() can not appear more than once in a single verbose()
 568  * statement.
 569  */
 570 static const char *reg_type_str(struct bpf_verifier_env *env,
 571                                 enum bpf_reg_type type)
 572 {
 573         char postfix[16] = {0}, prefix[64] = {0};
 574         static const char * const str[] = {
 575                 [NOT_INIT]              = "?",
 576                 [SCALAR_VALUE]          = "scalar",
 577                 [PTR_TO_CTX]            = "ctx",
 578                 [CONST_PTR_TO_MAP]      = "map_ptr",
 579                 [PTR_TO_MAP_VALUE]      = "map_value",
 580                 [PTR_TO_STACK]          = "fp",
 581                 [PTR_TO_PACKET]         = "pkt",
 582                 [PTR_TO_PACKET_META]    = "pkt_meta",
 583                 [PTR_TO_PACKET_END]     = "pkt_end",
 584                 [PTR_TO_FLOW_KEYS]      = "flow_keys",
 585                 [PTR_TO_SOCKET]         = "sock",
 586                 [PTR_TO_SOCK_COMMON]    = "sock_common",
 587                 [PTR_TO_TCP_SOCK]       = "tcp_sock",
 588                 [PTR_TO_TP_BUFFER]      = "tp_buffer",
 589                 [PTR_TO_XDP_SOCK]       = "xdp_sock",
 590                 [PTR_TO_BTF_ID]         = "ptr_",
 591                 [PTR_TO_MEM]            = "mem",
 592                 [PTR_TO_BUF]            = "buf",
 593                 [PTR_TO_FUNC]           = "func",
 594                 [PTR_TO_MAP_KEY]        = "map_key",
 595                 [CONST_PTR_TO_DYNPTR]   = "dynptr_ptr",
 596         };
 597
 598         if (type & PTR_MAYBE_NULL) {
 599                 if (base_type(type) == PTR_TO_BTF_ID)
 600                         strncpy(postfix, "or_null_", 16);
 601                 else
 602                         strncpy(postfix, "_or_null", 16);
 603         }
 604
 605         snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
 606                  type & MEM_RDONLY ? "rdonly_" : "",
 607                  type & MEM_RINGBUF ? "ringbuf_" : "",
 608                  type & MEM_USER ? "user_" : "",
 609                  type & MEM_PERCPU ? "percpu_" : "",
 610                  type & MEM_RCU ? "rcu_" : "",
 611                  type & PTR_UNTRUSTED ? "untrusted_" : "",
 612                  type & PTR_TRUSTED ? "trusted_" : ""
 613         );
 614
 615         snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
 616                  prefix, str[base_type(type)], postfix);
 617         return env->type_str_buf;
 618 }
 619
 620 static char slot_type_char[] = {
 621         [STACK_INVALID] = '?',
 622         [STACK_SPILL]   = 'r',
 623         [STACK_MISC]    = 'm',
 624         [STACK_ZERO]    = '0',
 625         [STACK_DYNPTR]  = 'd',
 626 };
 627
 628 static void print_liveness(struct bpf_verifier_env *env,
 629                            enum bpf_reg_liveness live)
 630 {
 631         if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
 632             verbose(env, "_");
 633         if (live & REG_LIVE_READ)
 634                 verbose(env, "r");
 635         if (live & REG_LIVE_WRITTEN)
 636                 verbose(env, "w");
 637         if (live & REG_LIVE_DONE)
 638                 verbose(env, "D");
 639 }
 640
 641 static int get_spi(s32 off)
 642 {
 643         return (-off - 1) / BPF_REG_SIZE;
 644 }
 645
 646 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
 647 {
 648         int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
 649
 650         /* We need to check that slots between [spi - nr_slots + 1, spi] are
 651          * within [0, allocated_stack).
 652          *
 653          * Please note that the spi grows downwards. For example, a dynptr
 654          * takes the size of two stack slots; the first slot will be at
 655          * spi and the second slot will be at spi - 1.
 656          */
 657         return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
 658 }
 659
 660 static struct bpf_func_state *func(struct bpf_verifier_env *env,
 661                                    const struct bpf_reg_state *reg)
 662 {
 663         struct bpf_verifier_state *cur = env->cur_state;
 664
 665         return cur->frame[reg->frameno];
 666 }
 667
 668 static const char *kernel_type_name(const struct btf* btf, u32 id)
 669 {
 670         return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
 671 }
 672
 673 static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
 674 {
 675         env->scratched_regs |= 1U << regno;
 676 }
 677
 678 static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
 679 {
 680         env->scratched_stack_slots |= 1ULL << spi;
 681 }
 682
 683 static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
 684 {
 685         return (env->scratched_regs >> regno) & 1;
 686 }
 687
 688 static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
 689 {
 690         return (env->scratched_stack_slots >> regno) & 1;
 691 }
 692
 693 static bool verifier_state_scratched(const struct bpf_verifier_env *env)
 694 {
 695         return env->scratched_regs || env->scratched_stack_slots;
 696 }
 697
 698 static void mark_verifier_state_clean(struct bpf_verifier_env *env)
 699 {
 700         env->scratched_regs = 0U;
 701         env->scratched_stack_slots = 0ULL;
 702 }
 703
 704 /* Used for printing the entire verifier state. */
 705 static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
 706 {
 707         env->scratched_regs = ~0U;
 708         env->scratched_stack_slots = ~0ULL;
 709 }
 710
 711 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
 712 {
 713         switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
 714         case DYNPTR_TYPE_LOCAL:
 715                 return BPF_DYNPTR_TYPE_LOCAL;
 716         case DYNPTR_TYPE_RINGBUF:
 717                 return BPF_DYNPTR_TYPE_RINGBUF;
 718         default:
 719                 return BPF_DYNPTR_TYPE_INVALID;
 720         }
 721 }
 722
 723 static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
 724 {
 725         return type == BPF_DYNPTR_TYPE_RINGBUF;
 726 }
 727
 728 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
 729                               enum bpf_dynptr_type type,
 730                               bool first_slot);
 731
 732 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
 733                                 struct bpf_reg_state *reg);
 734
 735 static void mark_dynptr_stack_regs(struct bpf_reg_state *sreg1,
 736                                    struct bpf_reg_state *sreg2,
 737                                    enum bpf_dynptr_type type)
 738 {
 739         __mark_dynptr_reg(sreg1, type, true);
 740         __mark_dynptr_reg(sreg2, type, false);
 741 }
 742
 743 static void mark_dynptr_cb_reg(struct bpf_reg_state *reg,
 744                                enum bpf_dynptr_type type)
 745 {
 746         __mark_dynptr_reg(reg, type, true);
 747 }
 748
 749
 750 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 751                                    enum bpf_arg_type arg_type, int insn_idx)
 752 {
 753         struct bpf_func_state *state = func(env, reg);
 754         enum bpf_dynptr_type type;
 755         int spi, i, id;
 756
 757         spi = get_spi(reg->off);
 758
 759         if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
 760                 return -EINVAL;
 761
 762         for (i = 0; i < BPF_REG_SIZE; i++) {
 763                 state->stack[spi].slot_type[i] = STACK_DYNPTR;
 764                 state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
 765         }
 766
 767         type = arg_to_dynptr_type(arg_type);
 768         if (type == BPF_DYNPTR_TYPE_INVALID)
 769                 return -EINVAL;
 770
 771         mark_dynptr_stack_regs(&state->stack[spi].spilled_ptr,
 772                                &state->stack[spi - 1].spilled_ptr, type);
 773
 774         if (dynptr_type_refcounted(type)) {
 775                 /* The id is used to track proper releasing */
 776                 id = acquire_reference_state(env, insn_idx);
 777                 if (id < 0)
 778                         return id;
 779
 780                 state->stack[spi].spilled_ptr.ref_obj_id = id;
 781                 state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
 782         }
 783
 784         return 0;
 785 }
 786
 787 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 788 {
 789         struct bpf_func_state *state = func(env, reg);
 790         int spi, i;
 791
 792         spi = get_spi(reg->off);
 793
 794         if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
 795                 return -EINVAL;
 796
 797         for (i = 0; i < BPF_REG_SIZE; i++) {
 798                 state->stack[spi].slot_type[i] = STACK_INVALID;
 799                 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
 800         }
 801
 802         /* Invalidate any slices associated with this dynptr */
 803         if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type))
 804                 WARN_ON_ONCE(release_reference(env, state->stack[spi].spilled_ptr.ref_obj_id));
 805
 806         __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
 807         __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
 808         return 0;
 809 }
 810
 811 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 812 {
 813         struct bpf_func_state *state = func(env, reg);
 814         int spi, i;
 815
 816         if (reg->type == CONST_PTR_TO_DYNPTR)
 817                 return false;
 818
 819         spi = get_spi(reg->off);
 820         if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
 821                 return true;
 822
 823         for (i = 0; i < BPF_REG_SIZE; i++) {
 824                 if (state->stack[spi].slot_type[i] == STACK_DYNPTR ||
 825                     state->stack[spi - 1].slot_type[i] == STACK_DYNPTR)
 826                         return false;
 827         }
 828
 829         return true;
 830 }
 831
 832 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 833 {
 834         struct bpf_func_state *state = func(env, reg);
 835         int spi;
 836         int i;
 837
 838         /* This already represents first slot of initialized bpf_dynptr */
 839         if (reg->type == CONST_PTR_TO_DYNPTR)
 840                 return true;
 841
 842         spi = get_spi(reg->off);
 843         if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
 844             !state->stack[spi].spilled_ptr.dynptr.first_slot)
 845                 return false;
 846
 847         for (i = 0; i < BPF_REG_SIZE; i++) {
 848                 if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
 849                     state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
 850                         return false;
 851         }
 852
 853         return true;
 854 }
 855
 856 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 857                                     enum bpf_arg_type arg_type)
 858 {
 859         struct bpf_func_state *state = func(env, reg);
 860         enum bpf_dynptr_type dynptr_type;
 861         int spi;
 862
 863         /* ARG_PTR_TO_DYNPTR takes any type of dynptr */
 864         if (arg_type == ARG_PTR_TO_DYNPTR)
 865                 return true;
 866
 867         dynptr_type = arg_to_dynptr_type(arg_type);
 868         if (reg->type == CONST_PTR_TO_DYNPTR) {
 869                 return reg->dynptr.type == dynptr_type;
 870         } else {
 871                 spi = get_spi(reg->off);
 872                 return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
 873         }
 874 }
 875
 876 /* The reg state of a pointer or a bounded scalar was saved when
 877  * it was spilled to the stack.
 878  */
 879 static bool is_spilled_reg(const struct bpf_stack_state *stack)
 880 {
 881         return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
 882 }
 883
 884 static void scrub_spilled_slot(u8 *stype)
 885 {
 886         if (*stype != STACK_INVALID)
 887                 *stype = STACK_MISC;
 888 }
 889
 890 static void print_verifier_state(struct bpf_verifier_env *env,
 891                                  const struct bpf_func_state *state,
 892                                  bool print_all)
 893 {
 894         const struct bpf_reg_state *reg;
 895         enum bpf_reg_type t;
 896         int i;
 897
 898         if (state->frameno)
 899                 verbose(env, " frame%d:", state->frameno);
 900         for (i = 0; i < MAX_BPF_REG; i++) {
 901                 reg = &state->regs[i];
 902                 t = reg->type;
 903                 if (t == NOT_INIT)
 904                         continue;
 905                 if (!print_all && !reg_scratched(env, i))
 906                         continue;
 907                 verbose(env, " R%d", i);
 908                 print_liveness(env, reg->live);
 909                 verbose(env, "=");
 910                 if (t == SCALAR_VALUE && reg->precise)
 911                         verbose(env, "P");
 912                 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
 913                     tnum_is_const(reg->var_off)) {
 914                         /* reg->off should be 0 for SCALAR_VALUE */
 915                         verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
 916                         verbose(env, "%lld", reg->var_off.value + reg->off);
 917                 } else {
 918                         const char *sep = "";
 919
 920                         verbose(env, "%s", reg_type_str(env, t));
 921                         if (base_type(t) == PTR_TO_BTF_ID)
 922                                 verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
 923                         verbose(env, "(");
 924 /*
 925  * _a stands for append, was shortened to avoid multiline statements below.
 926  * This macro is used to output a comma separated list of attributes.
 927  */
 928 #define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
 929
 930                         if (reg->id)
 931                                 verbose_a("id=%d", reg->id);
 932                         if (reg->ref_obj_id)
 933                                 verbose_a("ref_obj_id=%d", reg->ref_obj_id);
 934                         if (t != SCALAR_VALUE)
 935                                 verbose_a("off=%d", reg->off);
 936                         if (type_is_pkt_pointer(t))
 937                                 verbose_a("r=%d", reg->range);
 938                         else if (base_type(t) == CONST_PTR_TO_MAP ||
 939                                  base_type(t) == PTR_TO_MAP_KEY ||
 940                                  base_type(t) == PTR_TO_MAP_VALUE)
 941                                 verbose_a("ks=%d,vs=%d",
 942                                           reg->map_ptr->key_size,
 943                                           reg->map_ptr->value_size);
 944                         if (tnum_is_const(reg->var_off)) {
 945                                 /* Typically an immediate SCALAR_VALUE, but
 946                                  * could be a pointer whose offset is too big
 947                                  * for reg->off
 948                                  */
 949                                 verbose_a("imm=%llx", reg->var_off.value);
 950                         } else {
 951                                 if (reg->smin_value != reg->umin_value &&
 952                                     reg->smin_value != S64_MIN)
 953                                         verbose_a("smin=%lld", (long long)reg->smin_value);
 954                                 if (reg->smax_value != reg->umax_value &&
 955                                     reg->smax_value != S64_MAX)
 956                                         verbose_a("smax=%lld", (long long)reg->smax_value);
 957                                 if (reg->umin_value != 0)
 958                                         verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
 959                                 if (reg->umax_value != U64_MAX)
 960                                         verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
 961                                 if (!tnum_is_unknown(reg->var_off)) {
 962                                         char tn_buf[48];
 963
 964                                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 965                                         verbose_a("var_off=%s", tn_buf);
 966                                 }
 967                                 if (reg->s32_min_value != reg->smin_value &&
 968                                     reg->s32_min_value != S32_MIN)
 969                                         verbose_a("s32_min=%d", (int)(reg->s32_min_value));
 970                                 if (reg->s32_max_value != reg->smax_value &&
 971                                     reg->s32_max_value != S32_MAX)
 972                                         verbose_a("s32_max=%d", (int)(reg->s32_max_value));
 973                                 if (reg->u32_min_value != reg->umin_value &&
 974                                     reg->u32_min_value != U32_MIN)
 975                                         verbose_a("u32_min=%d", (int)(reg->u32_min_value));
 976                                 if (reg->u32_max_value != reg->umax_value &&
 977                                     reg->u32_max_value != U32_MAX)
 978                                         verbose_a("u32_max=%d", (int)(reg->u32_max_value));
 979                         }
 980 #undef verbose_a
 981
 982                         verbose(env, ")");
 983                 }
 984         }
 985         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
 986                 char types_buf[BPF_REG_SIZE + 1];
 987                 bool valid = false;
 988                 int j;
 989
 990                 for (j = 0; j < BPF_REG_SIZE; j++) {
 991                         if (state->stack[i].slot_type[j] != STACK_INVALID)
 992                                 valid = true;
 993                         types_buf[j] = slot_type_char[
 994                                         state->stack[i].slot_type[j]];
 995                 }
 996                 types_buf[BPF_REG_SIZE] = 0;
 997                 if (!valid)
 998                         continue;
 999                 if (!print_all && !stack_slot_scratched(env, i))
1000                         continue;
1001                 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1002                 print_liveness(env, state->stack[i].spilled_ptr.live);
1003                 if (is_spilled_reg(&state->stack[i])) {
1004                         reg = &state->stack[i].spilled_ptr;
1005                         t = reg->type;
1006                         verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
1007                         if (t == SCALAR_VALUE && reg->precise)
1008                                 verbose(env, "P");
1009                         if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
1010                                 verbose(env, "%lld", reg->var_off.value + reg->off);
1011                 } else {
1012                         verbose(env, "=%s", types_buf);
1013                 }
1014         }
1015         if (state->acquired_refs && state->refs[0].id) {
1016                 verbose(env, " refs=%d", state->refs[0].id);
1017                 for (i = 1; i < state->acquired_refs; i++)
1018                         if (state->refs[i].id)
1019                                 verbose(env, ",%d", state->refs[i].id);
1020         }
1021         if (state->in_callback_fn)
1022                 verbose(env, " cb");
1023         if (state->in_async_callback_fn)
1024                 verbose(env, " async_cb");
1025         verbose(env, "\n");
1026         mark_verifier_state_clean(env);
1027 }
1028
1029 static inline u32 vlog_alignment(u32 pos)
1030 {
1031         return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
1032                         BPF_LOG_MIN_ALIGNMENT) - pos - 1;
1033 }
1034
1035 static void print_insn_state(struct bpf_verifier_env *env,
1036                              const struct bpf_func_state *state)
1037 {
1038         if (env->prev_log_len && env->prev_log_len == env->log.len_used) {
1039                 /* remove new line character */
1040                 bpf_vlog_reset(&env->log, env->prev_log_len - 1);
1041                 verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' ');
1042         } else {
1043                 verbose(env, "%d:", env->insn_idx);
1044         }
1045         print_verifier_state(env, state, false);
1046 }
1047
1048 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1049  * small to hold src. This is different from krealloc since we don't want to preserve
1050  * the contents of dst.
1051  *
1052  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1053  * not be allocated.
1054  */
1055 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1056 {
1057         size_t alloc_bytes;
1058         void *orig = dst;
1059         size_t bytes;
1060
1061         if (ZERO_OR_NULL_PTR(src))
1062                 goto out;
1063
1064         if (unlikely(check_mul_overflow(n, size, &bytes)))
1065                 return NULL;
1066
1067         alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1068         dst = krealloc(orig, alloc_bytes, flags);
1069         if (!dst) {
1070                 kfree(orig);
1071                 return NULL;
1072         }
1073
1074         memcpy(dst, src, bytes);
1075 out:
1076         return dst ? dst : ZERO_SIZE_PTR;
1077 }
1078
1079 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1080  * small to hold new_n items. new items are zeroed out if the array grows.
1081  *
1082  * Contrary to krealloc_array, does not free arr if new_n is zero.
1083  */
1084 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1085 {
1086         size_t alloc_size;
1087         void *new_arr;
1088
1089         if (!new_n || old_n == new_n)
1090                 goto out;
1091
1092         alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1093         new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
1094         if (!new_arr) {
1095                 kfree(arr);
1096                 return NULL;
1097         }
1098         arr = new_arr;
1099
1100         if (new_n > old_n)
1101                 memset(arr + old_n * size, 0, (new_n - old_n) * size);
1102
1103 out:
1104         return arr ? arr : ZERO_SIZE_PTR;
1105 }
1106
1107 static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1108 {
1109         dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1110                                sizeof(struct bpf_reference_state), GFP_KERNEL);
1111         if (!dst->refs)
1112                 return -ENOMEM;
1113
1114         dst->acquired_refs = src->acquired_refs;
1115         return 0;
1116 }
1117
1118 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1119 {
1120         size_t n = src->allocated_stack / BPF_REG_SIZE;
1121
1122         dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1123                                 GFP_KERNEL);
1124         if (!dst->stack)
1125                 return -ENOMEM;
1126
1127         dst->allocated_stack = src->allocated_stack;
1128         return 0;
1129 }
1130
1131 static int resize_reference_state(struct bpf_func_state *state, size_t n)
1132 {
1133         state->refs = realloc_array(state->refs, state->acquired_refs, n,
1134                                     sizeof(struct bpf_reference_state));
1135         if (!state->refs)
1136                 return -ENOMEM;
1137
1138         state->acquired_refs = n;
1139         return 0;
1140 }
1141
1142 static int grow_stack_state(struct bpf_func_state *state, int size)
1143 {
1144         size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
1145
1146         if (old_n >= n)
1147                 return 0;
1148
1149         state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1150         if (!state->stack)
1151                 return -ENOMEM;
1152
1153         state->allocated_stack = size;
1154         return 0;
1155 }
1156
1157 /* Acquire a pointer id from the env and update the state->refs to include
1158  * this new pointer reference.
1159  * On success, returns a valid pointer id to associate with the register
1160  * On failure, returns a negative errno.
1161  */
1162 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1163 {
1164         struct bpf_func_state *state = cur_func(env);
1165         int new_ofs = state->acquired_refs;
1166         int id, err;
1167
1168         err = resize_reference_state(state, state->acquired_refs + 1);
1169         if (err)
1170                 return err;
1171         id = ++env->id_gen;
1172         state->refs[new_ofs].id = id;
1173         state->refs[new_ofs].insn_idx = insn_idx;
1174         state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0;
1175
1176         return id;
1177 }
1178
1179 /* release function corresponding to acquire_reference_state(). Idempotent. */
1180 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
1181 {
1182         int i, last_idx;
1183
1184         last_idx = state->acquired_refs - 1;
1185         for (i = 0; i < state->acquired_refs; i++) {
1186                 if (state->refs[i].id == ptr_id) {
1187                         /* Cannot release caller references in callbacks */
1188                         if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
1189                                 return -EINVAL;
1190                         if (last_idx && i != last_idx)
1191                                 memcpy(&state->refs[i], &state->refs[last_idx],
1192                                        sizeof(*state->refs));
1193                         memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1194                         state->acquired_refs--;
1195                         return 0;
1196                 }
1197         }
1198         return -EINVAL;
1199 }
1200
1201 static void free_func_state(struct bpf_func_state *state)
1202 {
1203         if (!state)
1204                 return;
1205         kfree(state->refs);
1206         kfree(state->stack);
1207         kfree(state);
1208 }
1209
1210 static void clear_jmp_history(struct bpf_verifier_state *state)
1211 {
1212         kfree(state->jmp_history);
1213         state->jmp_history = NULL;
1214         state->jmp_history_cnt = 0;
1215 }
1216
1217 static void free_verifier_state(struct bpf_verifier_state *state,
1218                                 bool free_self)
1219 {
1220         int i;
1221
1222         for (i = 0; i <= state->curframe; i++) {
1223                 free_func_state(state->frame[i]);
1224                 state->frame[i] = NULL;
1225         }
1226         clear_jmp_history(state);
1227         if (free_self)
1228                 kfree(state);
1229 }
1230
1231 /* copy verifier state from src to dst growing dst stack space
1232  * when necessary to accommodate larger src stack
1233  */
1234 static int copy_func_state(struct bpf_func_state *dst,
1235                            const struct bpf_func_state *src)
1236 {
1237         int err;
1238
1239         memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1240         err = copy_reference_state(dst, src);
1241         if (err)
1242                 return err;
1243         return copy_stack_state(dst, src);
1244 }
1245
1246 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1247                                const struct bpf_verifier_state *src)
1248 {
1249         struct bpf_func_state *dst;
1250         int i, err;
1251
1252         dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1253                                             src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
1254                                             GFP_USER);
1255         if (!dst_state->jmp_history)
1256                 return -ENOMEM;
1257         dst_state->jmp_history_cnt = src->jmp_history_cnt;
1258
1259         /* if dst has more stack frames then src frame, free them */
1260         for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1261                 free_func_state(dst_state->frame[i]);
1262                 dst_state->frame[i] = NULL;
1263         }
1264         dst_state->speculative = src->speculative;
1265         dst_state->active_rcu_lock = src->active_rcu_lock;
1266         dst_state->curframe = src->curframe;
1267         dst_state->active_lock.ptr = src->active_lock.ptr;
1268         dst_state->active_lock.id = src->active_lock.id;
1269         dst_state->branches = src->branches;
1270         dst_state->parent = src->parent;
1271         dst_state->first_insn_idx = src->first_insn_idx;
1272         dst_state->last_insn_idx = src->last_insn_idx;
1273         for (i = 0; i <= src->curframe; i++) {
1274                 dst = dst_state->frame[i];
1275                 if (!dst) {
1276                         dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1277                         if (!dst)
1278                                 return -ENOMEM;
1279                         dst_state->frame[i] = dst;
1280                 }
1281                 err = copy_func_state(dst, src->frame[i]);
1282                 if (err)
1283                         return err;
1284         }
1285         return 0;
1286 }
1287
1288 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1289 {
1290         while (st) {
1291                 u32 br = --st->branches;
1292
1293                 /* WARN_ON(br > 1) technically makes sense here,
1294                  * but see comment in push_stack(), hence:
1295                  */
1296                 WARN_ONCE((int)br < 0,
1297                           "BUG update_branch_counts:branches_to_explore=%d\n",
1298                           br);
1299                 if (br)
1300                         break;
1301                 st = st->parent;
1302         }
1303 }
1304
1305 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1306                      int *insn_idx, bool pop_log)
1307 {
1308         struct bpf_verifier_state *cur = env->cur_state;
1309         struct bpf_verifier_stack_elem *elem, *head = env->head;
1310         int err;
1311
1312         if (env->head == NULL)
1313                 return -ENOENT;
1314
1315         if (cur) {
1316                 err = copy_verifier_state(cur, &head->st);
1317                 if (err)
1318                         return err;
1319         }
1320         if (pop_log)
1321                 bpf_vlog_reset(&env->log, head->log_pos);
1322         if (insn_idx)
1323                 *insn_idx = head->insn_idx;
1324         if (prev_insn_idx)
1325                 *prev_insn_idx = head->prev_insn_idx;
1326         elem = head->next;
1327         free_verifier_state(&head->st, false);
1328         kfree(head);
1329         env->head = elem;
1330         env->stack_size--;
1331         return 0;
1332 }
1333
1334 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1335                                              int insn_idx, int prev_insn_idx,
1336                                              bool speculative)
1337 {
1338         struct bpf_verifier_state *cur = env->cur_state;
1339         struct bpf_verifier_stack_elem *elem;
1340         int err;
1341
1342         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1343         if (!elem)
1344                 goto err;
1345
1346         elem->insn_idx = insn_idx;
1347         elem->prev_insn_idx = prev_insn_idx;
1348         elem->next = env->head;
1349         elem->log_pos = env->log.len_used;
1350         env->head = elem;
1351         env->stack_size++;
1352         err = copy_verifier_state(&elem->st, cur);
1353         if (err)
1354                 goto err;
1355         elem->st.speculative |= speculative;
1356         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1357                 verbose(env, "The sequence of %d jumps is too complex.\n",
1358                         env->stack_size);
1359                 goto err;
1360         }
1361         if (elem->st.parent) {
1362                 ++elem->st.parent->branches;
1363                 /* WARN_ON(branches > 2) technically makes sense here,
1364                  * but
1365                  * 1. speculative states will bump 'branches' for non-branch
1366                  * instructions
1367                  * 2. is_state_visited() heuristics may decide not to create
1368                  * a new state for a sequence of branches and all such current
1369                  * and cloned states will be pointing to a single parent state
1370                  * which might have large 'branches' count.
1371                  */
1372         }
1373         return &elem->st;
1374 err:
1375         free_verifier_state(env->cur_state, true);
1376         env->cur_state = NULL;
1377         /* pop all elements and return */
1378         while (!pop_stack(env, NULL, NULL, false));
1379         return NULL;
1380 }
1381
1382 #define CALLER_SAVED_REGS 6
1383 static const int caller_saved[CALLER_SAVED_REGS] = {
1384         BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1385 };
1386
1387 /* This helper doesn't clear reg->id */
1388 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1389 {
1390         reg->var_off = tnum_const(imm);
1391         reg->smin_value = (s64)imm;
1392         reg->smax_value = (s64)imm;
1393         reg->umin_value = imm;
1394         reg->umax_value = imm;
1395
1396         reg->s32_min_value = (s32)imm;
1397         reg->s32_max_value = (s32)imm;
1398         reg->u32_min_value = (u32)imm;
1399         reg->u32_max_value = (u32)imm;
1400 }
1401
1402 /* Mark the unknown part of a register (variable offset or scalar value) as
1403  * known to have the value @imm.
1404  */
1405 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1406 {
1407         /* Clear id, off, and union(map_ptr, range) */
1408         memset(((u8 *)reg) + sizeof(reg->type), 0,
1409                offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1410         ___mark_reg_known(reg, imm);
1411 }
1412
1413 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1414 {
1415         reg->var_off = tnum_const_subreg(reg->var_off, imm);
1416         reg->s32_min_value = (s32)imm;
1417         reg->s32_max_value = (s32)imm;
1418         reg->u32_min_value = (u32)imm;
1419         reg->u32_max_value = (u32)imm;
1420 }
1421
1422 /* Mark the 'variable offset' part of a register as zero.  This should be
1423  * used only on registers holding a pointer type.
1424  */
1425 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1426 {
1427         __mark_reg_known(reg, 0);
1428 }
1429
1430 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1431 {
1432         __mark_reg_known(reg, 0);
1433         reg->type = SCALAR_VALUE;
1434 }
1435
1436 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1437                                 struct bpf_reg_state *regs, u32 regno)
1438 {
1439         if (WARN_ON(regno >= MAX_BPF_REG)) {
1440                 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1441                 /* Something bad happened, let's kill all regs */
1442                 for (regno = 0; regno < MAX_BPF_REG; regno++)
1443                         __mark_reg_not_init(env, regs + regno);
1444                 return;
1445         }
1446         __mark_reg_known_zero(regs + regno);
1447 }
1448
1449 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
1450                               bool first_slot)
1451 {
1452         /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
1453          * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1454          * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1455          */
1456         __mark_reg_known_zero(reg);
1457         reg->type = CONST_PTR_TO_DYNPTR;
1458         reg->dynptr.type = type;
1459         reg->dynptr.first_slot = first_slot;
1460 }
1461
1462 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1463 {
1464         if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1465                 const struct bpf_map *map = reg->map_ptr;
1466
1467                 if (map->inner_map_meta) {
1468                         reg->type = CONST_PTR_TO_MAP;
1469                         reg->map_ptr = map->inner_map_meta;
1470                         /* transfer reg's id which is unique for every map_lookup_elem
1471                          * as UID of the inner map.
1472                          */
1473                         if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER))
1474                                 reg->map_uid = reg->id;
1475                 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1476                         reg->type = PTR_TO_XDP_SOCK;
1477                 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1478                            map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1479                         reg->type = PTR_TO_SOCKET;
1480                 } else {
1481                         reg->type = PTR_TO_MAP_VALUE;
1482                 }
1483                 return;
1484         }
1485
1486         reg->type &= ~PTR_MAYBE_NULL;
1487 }
1488
1489 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1490 {
1491         return type_is_pkt_pointer(reg->type);
1492 }
1493
1494 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1495 {
1496         return reg_is_pkt_pointer(reg) ||
1497                reg->type == PTR_TO_PACKET_END;
1498 }
1499
1500 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1501 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1502                                     enum bpf_reg_type which)
1503 {
1504         /* The register can already have a range from prior markings.
1505          * This is fine as long as it hasn't been advanced from its
1506          * origin.
1507          */
1508         return reg->type == which &&
1509                reg->id == 0 &&
1510                reg->off == 0 &&
1511                tnum_equals_const(reg->var_off, 0);
1512 }
1513
1514 /* Reset the min/max bounds of a register */
1515 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1516 {
1517         reg->smin_value = S64_MIN;
1518         reg->smax_value = S64_MAX;
1519         reg->umin_value = 0;
1520         reg->umax_value = U64_MAX;
1521
1522         reg->s32_min_value = S32_MIN;
1523         reg->s32_max_value = S32_MAX;
1524         reg->u32_min_value = 0;
1525         reg->u32_max_value = U32_MAX;
1526 }
1527
1528 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1529 {
1530         reg->smin_value = S64_MIN;
1531         reg->smax_value = S64_MAX;
1532         reg->umin_value = 0;
1533         reg->umax_value = U64_MAX;
1534 }
1535
1536 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1537 {
1538         reg->s32_min_value = S32_MIN;
1539         reg->s32_max_value = S32_MAX;
1540         reg->u32_min_value = 0;
1541         reg->u32_max_value = U32_MAX;
1542 }
1543
1544 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1545 {
1546         struct tnum var32_off = tnum_subreg(reg->var_off);
1547
1548         /* min signed is max(sign bit) | min(other bits) */
1549         reg->s32_min_value = max_t(s32, reg->s32_min_value,
1550                         var32_off.value | (var32_off.mask & S32_MIN));
1551         /* max signed is min(sign bit) | max(other bits) */
1552         reg->s32_max_value = min_t(s32, reg->s32_max_value,
1553                         var32_off.value | (var32_off.mask & S32_MAX));
1554         reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1555         reg->u32_max_value = min(reg->u32_max_value,
1556                                  (u32)(var32_off.value | var32_off.mask));
1557 }
1558
1559 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1560 {
1561         /* min signed is max(sign bit) | min(other bits) */
1562         reg->smin_value = max_t(s64, reg->smin_value,
1563                                 reg->var_off.value | (reg->var_off.mask & S64_MIN));
1564         /* max signed is min(sign bit) | max(other bits) */
1565         reg->smax_value = min_t(s64, reg->smax_value,
1566                                 reg->var_off.value | (reg->var_off.mask & S64_MAX));
1567         reg->umin_value = max(reg->umin_value, reg->var_off.value);
1568         reg->umax_value = min(reg->umax_value,
1569                               reg->var_off.value | reg->var_off.mask);
1570 }
1571
1572 static void __update_reg_bounds(struct bpf_reg_state *reg)
1573 {
1574         __update_reg32_bounds(reg);
1575         __update_reg64_bounds(reg);
1576 }
1577
1578 /* Uses signed min/max values to inform unsigned, and vice-versa */
1579 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1580 {
1581         /* Learn sign from signed bounds.
1582          * If we cannot cross the sign boundary, then signed and unsigned bounds
1583          * are the same, so combine.  This works even in the negative case, e.g.
1584          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1585          */
1586         if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1587                 reg->s32_min_value = reg->u32_min_value =
1588                         max_t(u32, reg->s32_min_value, reg->u32_min_value);
1589                 reg->s32_max_value = reg->u32_max_value =
1590                         min_t(u32, reg->s32_max_value, reg->u32_max_value);
1591                 return;
1592         }
1593         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1594          * boundary, so we must be careful.
1595          */
1596         if ((s32)reg->u32_max_value >= 0) {
1597                 /* Positive.  We can't learn anything from the smin, but smax
1598                  * is positive, hence safe.
1599                  */
1600                 reg->s32_min_value = reg->u32_min_value;
1601                 reg->s32_max_value = reg->u32_max_value =
1602                         min_t(u32, reg->s32_max_value, reg->u32_max_value);
1603         } else if ((s32)reg->u32_min_value < 0) {
1604                 /* Negative.  We can't learn anything from the smax, but smin
1605                  * is negative, hence safe.
1606                  */
1607                 reg->s32_min_value = reg->u32_min_value =
1608                         max_t(u32, reg->s32_min_value, reg->u32_min_value);
1609                 reg->s32_max_value = reg->u32_max_value;
1610         }
1611 }
1612
1613 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1614 {
1615         /* Learn sign from signed bounds.
1616          * If we cannot cross the sign boundary, then signed and unsigned bounds
1617          * are the same, so combine.  This works even in the negative case, e.g.
1618          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1619          */
1620         if (reg->smin_value >= 0 || reg->smax_value < 0) {
1621                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1622                                                           reg->umin_value);
1623                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1624                                                           reg->umax_value);
1625                 return;
1626         }
1627         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1628          * boundary, so we must be careful.
1629          */
1630         if ((s64)reg->umax_value >= 0) {
1631                 /* Positive.  We can't learn anything from the smin, but smax
1632                  * is positive, hence safe.
1633                  */
1634                 reg->smin_value = reg->umin_value;
1635                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1636                                                           reg->umax_value);
1637         } else if ((s64)reg->umin_value < 0) {
1638                 /* Negative.  We can't learn anything from the smax, but smin
1639                  * is negative, hence safe.
1640                  */
1641                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1642                                                           reg->umin_value);
1643                 reg->smax_value = reg->umax_value;
1644         }
1645 }
1646
1647 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1648 {
1649         __reg32_deduce_bounds(reg);
1650         __reg64_deduce_bounds(reg);
1651 }
1652
1653 /* Attempts to improve var_off based on unsigned min/max information */
1654 static void __reg_bound_offset(struct bpf_reg_state *reg)
1655 {
1656         struct tnum var64_off = tnum_intersect(reg->var_off,
1657                                                tnum_range(reg->umin_value,
1658                                                           reg->umax_value));
1659         struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1660                                                 tnum_range(reg->u32_min_value,
1661                                                            reg->u32_max_value));
1662
1663         reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1664 }
1665
1666 static void reg_bounds_sync(struct bpf_reg_state *reg)
1667 {
1668         /* We might have learned new bounds from the var_off. */
1669         __update_reg_bounds(reg);
1670         /* We might have learned something about the sign bit. */
1671         __reg_deduce_bounds(reg);
1672         /* We might have learned some bits from the bounds. */
1673         __reg_bound_offset(reg);
1674         /* Intersecting with the old var_off might have improved our bounds
1675          * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1676          * then new var_off is (0; 0x7f...fc) which improves our umax.
1677          */
1678         __update_reg_bounds(reg);
1679 }
1680
1681 static bool __reg32_bound_s64(s32 a)
1682 {
1683         return a >= 0 && a <= S32_MAX;
1684 }
1685
1686 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1687 {
1688         reg->umin_value = reg->u32_min_value;
1689         reg->umax_value = reg->u32_max_value;
1690
1691         /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1692          * be positive otherwise set to worse case bounds and refine later
1693          * from tnum.
1694          */
1695         if (__reg32_bound_s64(reg->s32_min_value) &&
1696             __reg32_bound_s64(reg->s32_max_value)) {
1697                 reg->smin_value = reg->s32_min_value;
1698                 reg->smax_value = reg->s32_max_value;
1699         } else {
1700                 reg->smin_value = 0;
1701                 reg->smax_value = U32_MAX;
1702         }
1703 }
1704
1705 static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1706 {
1707         /* special case when 64-bit register has upper 32-bit register
1708          * zeroed. Typically happens after zext or <<32, >>32 sequence
1709          * allowing us to use 32-bit bounds directly,
1710          */
1711         if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1712                 __reg_assign_32_into_64(reg);
1713         } else {
1714                 /* Otherwise the best we can do is push lower 32bit known and
1715                  * unknown bits into register (var_off set from jmp logic)
1716                  * then learn as much as possible from the 64-bit tnum
1717                  * known and unknown bits. The previous smin/smax bounds are
1718                  * invalid here because of jmp32 compare so mark them unknown
1719                  * so they do not impact tnum bounds calculation.
1720                  */
1721                 __mark_reg64_unbounded(reg);
1722         }
1723         reg_bounds_sync(reg);
1724 }
1725
1726 static bool __reg64_bound_s32(s64 a)
1727 {
1728         return a >= S32_MIN && a <= S32_MAX;
1729 }
1730
1731 static bool __reg64_bound_u32(u64 a)
1732 {
1733         return a >= U32_MIN && a <= U32_MAX;
1734 }
1735
1736 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1737 {
1738         __mark_reg32_unbounded(reg);
1739         if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1740                 reg->s32_min_value = (s32)reg->smin_value;
1741                 reg->s32_max_value = (s32)reg->smax_value;
1742         }
1743         if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1744                 reg->u32_min_value = (u32)reg->umin_value;
1745                 reg->u32_max_value = (u32)reg->umax_value;
1746         }
1747         reg_bounds_sync(reg);
1748 }
1749
1750 /* Mark a register as having a completely unknown (scalar) value. */
1751 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1752                                struct bpf_reg_state *reg)
1753 {
1754         /*
1755          * Clear type, id, off, and union(map_ptr, range) and
1756          * padding between 'type' and union
1757          */
1758         memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1759         reg->type = SCALAR_VALUE;
1760         reg->var_off = tnum_unknown;
1761         reg->frameno = 0;
1762         reg->precise = !env->bpf_capable;
1763         __mark_reg_unbounded(reg);
1764 }
1765
1766 static void mark_reg_unknown(struct bpf_verifier_env *env,
1767                              struct bpf_reg_state *regs, u32 regno)
1768 {
1769         if (WARN_ON(regno >= MAX_BPF_REG)) {
1770                 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1771                 /* Something bad happened, let's kill all regs except FP */
1772                 for (regno = 0; regno < BPF_REG_FP; regno++)
1773                         __mark_reg_not_init(env, regs + regno);
1774                 return;
1775         }
1776         __mark_reg_unknown(env, regs + regno);
1777 }
1778
1779 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1780                                 struct bpf_reg_state *reg)
1781 {
1782         __mark_reg_unknown(env, reg);
1783         reg->type = NOT_INIT;
1784 }
1785
1786 static void mark_reg_not_init(struct bpf_verifier_env *env,
1787                               struct bpf_reg_state *regs, u32 regno)
1788 {
1789         if (WARN_ON(regno >= MAX_BPF_REG)) {
1790                 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1791                 /* Something bad happened, let's kill all regs except FP */
1792                 for (regno = 0; regno < BPF_REG_FP; regno++)
1793                         __mark_reg_not_init(env, regs + regno);
1794                 return;
1795         }
1796         __mark_reg_not_init(env, regs + regno);
1797 }
1798
1799 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1800                             struct bpf_reg_state *regs, u32 regno,
1801                             enum bpf_reg_type reg_type,
1802                             struct btf *btf, u32 btf_id,
1803                             enum bpf_type_flag flag)
1804 {
1805         if (reg_type == SCALAR_VALUE) {
1806                 mark_reg_unknown(env, regs, regno);
1807                 return;
1808         }
1809         mark_reg_known_zero(env, regs, regno);
1810         regs[regno].type = PTR_TO_BTF_ID | flag;
1811         regs[regno].btf = btf;
1812         regs[regno].btf_id = btf_id;
1813 }
1814
1815 #define DEF_NOT_SUBREG  (0)
1816 static void init_reg_state(struct bpf_verifier_env *env,
1817                            struct bpf_func_state *state)
1818 {
1819         struct bpf_reg_state *regs = state->regs;
1820         int i;
1821
1822         for (i = 0; i < MAX_BPF_REG; i++) {
1823                 mark_reg_not_init(env, regs, i);
1824                 regs[i].live = REG_LIVE_NONE;
1825                 regs[i].parent = NULL;
1826                 regs[i].subreg_def = DEF_NOT_SUBREG;
1827         }
1828
1829         /* frame pointer */
1830         regs[BPF_REG_FP].type = PTR_TO_STACK;
1831         mark_reg_known_zero(env, regs, BPF_REG_FP);
1832         regs[BPF_REG_FP].frameno = state->frameno;
1833 }
1834
1835 #define BPF_MAIN_FUNC (-1)
1836 static void init_func_state(struct bpf_verifier_env *env,
1837                             struct bpf_func_state *state,
1838                             int callsite, int frameno, int subprogno)
1839 {
1840         state->callsite = callsite;
1841         state->frameno = frameno;
1842         state->subprogno = subprogno;
1843         state->callback_ret_range = tnum_range(0, 0);
1844         init_reg_state(env, state);
1845         mark_verifier_state_scratched(env);
1846 }
1847
1848 /* Similar to push_stack(), but for async callbacks */
1849 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
1850                                                 int insn_idx, int prev_insn_idx,
1851                                                 int subprog)
1852 {
1853         struct bpf_verifier_stack_elem *elem;
1854         struct bpf_func_state *frame;
1855
1856         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1857         if (!elem)
1858                 goto err;
1859
1860         elem->insn_idx = insn_idx;
1861         elem->prev_insn_idx = prev_insn_idx;
1862         elem->next = env->head;
1863         elem->log_pos = env->log.len_used;
1864         env->head = elem;
1865         env->stack_size++;
1866         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1867                 verbose(env,
1868                         "The sequence of %d jumps is too complex for async cb.\n",
1869                         env->stack_size);
1870                 goto err;
1871         }
1872         /* Unlike push_stack() do not copy_verifier_state().
1873          * The caller state doesn't matter.
1874          * This is async callback. It starts in a fresh stack.
1875          * Initialize it similar to do_check_common().
1876          */
1877         elem->st.branches = 1;
1878         frame = kzalloc(sizeof(*frame), GFP_KERNEL);
1879         if (!frame)
1880                 goto err;
1881         init_func_state(env, frame,
1882                         BPF_MAIN_FUNC /* callsite */,
1883                         0 /* frameno within this callchain */,
1884                         subprog /* subprog number within this prog */);
1885         elem->st.frame[0] = frame;
1886         return &elem->st;
1887 err:
1888         free_verifier_state(env->cur_state, true);
1889         env->cur_state = NULL;
1890         /* pop all elements and return */
1891         while (!pop_stack(env, NULL, NULL, false));
1892         return NULL;
1893 }
1894
1895
1896 enum reg_arg_type {
1897         SRC_OP,         /* register is used as source operand */
1898         DST_OP,         /* register is used as destination operand */
1899         DST_OP_NO_MARK  /* same as above, check only, don't mark */
1900 };
1901
1902 static int cmp_subprogs(const void *a, const void *b)
1903 {
1904         return ((struct bpf_subprog_info *)a)->start -
1905                ((struct bpf_subprog_info *)b)->start;
1906 }
1907
1908 static int find_subprog(struct bpf_verifier_env *env, int off)
1909 {
1910         struct bpf_subprog_info *p;
1911
1912         p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1913                     sizeof(env->subprog_info[0]), cmp_subprogs);
1914         if (!p)
1915                 return -ENOENT;
1916         return p - env->subprog_info;
1917
1918 }
1919
1920 static int add_subprog(struct bpf_verifier_env *env, int off)
1921 {
1922         int insn_cnt = env->prog->len;
1923         int ret;
1924
1925         if (off >= insn_cnt || off < 0) {
1926                 verbose(env, "call to invalid destination\n");
1927                 return -EINVAL;
1928         }
1929         ret = find_subprog(env, off);
1930         if (ret >= 0)
1931                 return ret;
1932         if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1933                 verbose(env, "too many subprograms\n");
1934                 return -E2BIG;
1935         }
1936         /* determine subprog starts. The end is one before the next starts */
1937         env->subprog_info[env->subprog_cnt++].start = off;
1938         sort(env->subprog_info, env->subprog_cnt,
1939              sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1940         return env->subprog_cnt - 1;
1941 }
1942
1943 #define MAX_KFUNC_DESCS 256
1944 #define MAX_KFUNC_BTFS  256
1945
1946 struct bpf_kfunc_desc {
1947         struct btf_func_model func_model;
1948         u32 func_id;
1949         s32 imm;
1950         u16 offset;
1951 };
1952
1953 struct bpf_kfunc_btf {
1954         struct btf *btf;
1955         struct module *module;
1956         u16 offset;
1957 };
1958
1959 struct bpf_kfunc_desc_tab {
1960         struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
1961         u32 nr_descs;
1962 };
1963
1964 struct bpf_kfunc_btf_tab {
1965         struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
1966         u32 nr_descs;
1967 };
1968
1969 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
1970 {
1971         const struct bpf_kfunc_desc *d0 = a;
1972         const struct bpf_kfunc_desc *d1 = b;
1973
1974         /* func_id is not greater than BTF_MAX_TYPE */
1975         return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
1976 }
1977
1978 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
1979 {
1980         const struct bpf_kfunc_btf *d0 = a;
1981         const struct bpf_kfunc_btf *d1 = b;
1982
1983         return d0->offset - d1->offset;
1984 }
1985
1986 static const struct bpf_kfunc_desc *
1987 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
1988 {
1989         struct bpf_kfunc_desc desc = {
1990                 .func_id = func_id,
1991                 .offset = offset,
1992         };
1993         struct bpf_kfunc_desc_tab *tab;
1994
1995         tab = prog->aux->kfunc_tab;
1996         return bsearch(&desc, tab->descs, tab->nr_descs,
1997                        sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
1998 }
1999
2000 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2001                                          s16 offset)
2002 {
2003         struct bpf_kfunc_btf kf_btf = { .offset = offset };
2004         struct bpf_kfunc_btf_tab *tab;
2005         struct bpf_kfunc_btf *b;
2006         struct module *mod;
2007         struct btf *btf;
2008         int btf_fd;
2009
2010         tab = env->prog->aux->kfunc_btf_tab;
2011         b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2012                     sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2013         if (!b) {
2014                 if (tab->nr_descs == MAX_KFUNC_BTFS) {
2015                         verbose(env, "too many different module BTFs\n");
2016                         return ERR_PTR(-E2BIG);
2017                 }
2018
2019                 if (bpfptr_is_null(env->fd_array)) {
2020                         verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2021                         return ERR_PTR(-EPROTO);
2022                 }
2023
2024                 if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2025                                             offset * sizeof(btf_fd),
2026                                             sizeof(btf_fd)))
2027                         return ERR_PTR(-EFAULT);
2028
2029                 btf = btf_get_by_fd(btf_fd);
2030                 if (IS_ERR(btf)) {
2031                         verbose(env, "invalid module BTF fd specified\n");
2032                         return btf;
2033                 }
2034
2035                 if (!btf_is_module(btf)) {
2036                         verbose(env, "BTF fd for kfunc is not a module BTF\n");
2037                         btf_put(btf);
2038                         return ERR_PTR(-EINVAL);
2039                 }
2040
2041                 mod = btf_try_get_module(btf);
2042                 if (!mod) {
2043                         btf_put(btf);
2044                         return ERR_PTR(-ENXIO);
2045                 }
2046
2047                 b = &tab->descs[tab->nr_descs++];
2048                 b->btf = btf;
2049                 b->module = mod;
2050                 b->offset = offset;
2051
2052                 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2053                      kfunc_btf_cmp_by_off, NULL);
2054         }
2055         return b->btf;
2056 }
2057
2058 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2059 {
2060         if (!tab)
2061                 return;
2062
2063         while (tab->nr_descs--) {
2064                 module_put(tab->descs[tab->nr_descs].module);
2065                 btf_put(tab->descs[tab->nr_descs].btf);
2066         }
2067         kfree(tab);
2068 }
2069
2070 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2071 {
2072         if (offset) {
2073                 if (offset < 0) {
2074                         /* In the future, this can be allowed to increase limit
2075                          * of fd index into fd_array, interpreted as u16.
2076                          */
2077                         verbose(env, "negative offset disallowed for kernel module function call\n");
2078                         return ERR_PTR(-EINVAL);
2079                 }
2080
2081                 return __find_kfunc_desc_btf(env, offset);
2082         }
2083         return btf_vmlinux ?: ERR_PTR(-ENOENT);
2084 }
2085
2086 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
2087 {
2088         const struct btf_type *func, *func_proto;
2089         struct bpf_kfunc_btf_tab *btf_tab;
2090         struct bpf_kfunc_desc_tab *tab;
2091         struct bpf_prog_aux *prog_aux;
2092         struct bpf_kfunc_desc *desc;
2093         const char *func_name;
2094         struct btf *desc_btf;
2095         unsigned long call_imm;
2096         unsigned long addr;
2097         int err;
2098
2099         prog_aux = env->prog->aux;
2100         tab = prog_aux->kfunc_tab;
2101         btf_tab = prog_aux->kfunc_btf_tab;
2102         if (!tab) {
2103                 if (!btf_vmlinux) {
2104                         verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2105                         return -ENOTSUPP;
2106                 }
2107
2108                 if (!env->prog->jit_requested) {
2109                         verbose(env, "JIT is required for calling kernel function\n");
2110                         return -ENOTSUPP;
2111                 }
2112
2113                 if (!bpf_jit_supports_kfunc_call()) {
2114                         verbose(env, "JIT does not support calling kernel function\n");
2115                         return -ENOTSUPP;
2116                 }
2117
2118                 if (!env->prog->gpl_compatible) {
2119                         verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2120                         return -EINVAL;
2121                 }
2122
2123                 tab = kzalloc(sizeof(*tab), GFP_KERNEL);
2124                 if (!tab)
2125                         return -ENOMEM;
2126                 prog_aux->kfunc_tab = tab;
2127         }
2128
2129         /* func_id == 0 is always invalid, but instead of returning an error, be
2130          * conservative and wait until the code elimination pass before returning
2131          * error, so that invalid calls that get pruned out can be in BPF programs
2132          * loaded from userspace.  It is also required that offset be untouched
2133          * for such calls.
2134          */
2135         if (!func_id && !offset)
2136                 return 0;
2137
2138         if (!btf_tab && offset) {
2139                 btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
2140                 if (!btf_tab)
2141                         return -ENOMEM;
2142                 prog_aux->kfunc_btf_tab = btf_tab;
2143         }
2144
2145         desc_btf = find_kfunc_desc_btf(env, offset);
2146         if (IS_ERR(desc_btf)) {
2147                 verbose(env, "failed to find BTF for kernel function\n");
2148                 return PTR_ERR(desc_btf);
2149         }
2150
2151         if (find_kfunc_desc(env->prog, func_id, offset))
2152                 return 0;
2153
2154         if (tab->nr_descs == MAX_KFUNC_DESCS) {
2155                 verbose(env, "too many different kernel function calls\n");
2156                 return -E2BIG;
2157         }
2158
2159         func = btf_type_by_id(desc_btf, func_id);
2160         if (!func || !btf_type_is_func(func)) {
2161                 verbose(env, "kernel btf_id %u is not a function\n",
2162                         func_id);
2163                 return -EINVAL;
2164         }
2165         func_proto = btf_type_by_id(desc_btf, func->type);
2166         if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2167                 verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
2168                         func_id);
2169                 return -EINVAL;
2170         }
2171
2172         func_name = btf_name_by_offset(desc_btf, func->name_off);
2173         addr = kallsyms_lookup_name(func_name);
2174         if (!addr) {
2175                 verbose(env, "cannot find address for kernel function %s\n",
2176                         func_name);
2177                 return -EINVAL;
2178         }
2179
2180         call_imm = BPF_CALL_IMM(addr);
2181         /* Check whether or not the relative offset overflows desc->imm */
2182         if ((unsigned long)(s32)call_imm != call_imm) {
2183                 verbose(env, "address of kernel function %s is out of range\n",
2184                         func_name);
2185                 return -EINVAL;
2186         }
2187
2188         desc = &tab->descs[tab->nr_descs++];
2189         desc->func_id = func_id;
2190         desc->imm = call_imm;
2191         desc->offset = offset;
2192         err = btf_distill_func_proto(&env->log, desc_btf,
2193                                      func_proto, func_name,
2194                                      &desc->func_model);
2195         if (!err)
2196                 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2197                      kfunc_desc_cmp_by_id_off, NULL);
2198         return err;
2199 }
2200
2201 static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
2202 {
2203         const struct bpf_kfunc_desc *d0 = a;
2204         const struct bpf_kfunc_desc *d1 = b;
2205
2206         if (d0->imm > d1->imm)
2207                 return 1;
2208         else if (d0->imm < d1->imm)
2209                 return -1;
2210         return 0;
2211 }
2212
2213 static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
2214 {
2215         struct bpf_kfunc_desc_tab *tab;
2216
2217         tab = prog->aux->kfunc_tab;
2218         if (!tab)
2219                 return;
2220
2221         sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2222              kfunc_desc_cmp_by_imm, NULL);
2223 }
2224
2225 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2226 {
2227         return !!prog->aux->kfunc_tab;
2228 }
2229
2230 const struct btf_func_model *
2231 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
2232                          const struct bpf_insn *insn)
2233 {
2234         const struct bpf_kfunc_desc desc = {
2235                 .imm = insn->imm,
2236         };
2237         const struct bpf_kfunc_desc *res;
2238         struct bpf_kfunc_desc_tab *tab;
2239
2240         tab = prog->aux->kfunc_tab;
2241         res = bsearch(&desc, tab->descs, tab->nr_descs,
2242                       sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
2243
2244         return res ? &res->func_model : NULL;
2245 }
2246
2247 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2248 {
2249         struct bpf_subprog_info *subprog = env->subprog_info;
2250         struct bpf_insn *insn = env->prog->insnsi;
2251         int i, ret, insn_cnt = env->prog->len;
2252
2253         /* Add entry function. */
2254         ret = add_subprog(env, 0);
2255         if (ret)
2256                 return ret;
2257
2258         for (i = 0; i < insn_cnt; i++, insn++) {
2259                 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2260                     !bpf_pseudo_kfunc_call(insn))
2261                         continue;
2262
2263                 if (!env->bpf_capable) {
2264                         verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2265                         return -EPERM;
2266                 }
2267
2268                 if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2269                         ret = add_subprog(env, i + insn->imm + 1);
2270                 else
2271                         ret = add_kfunc_call(env, insn->imm, insn->off);
2272
2273                 if (ret < 0)
2274                         return ret;
2275         }
2276
2277         /* Add a fake 'exit' subprog which could simplify subprog iteration
2278          * logic. 'subprog_cnt' should not be increased.
2279          */
2280         subprog[env->subprog_cnt].start = insn_cnt;
2281
2282         if (env->log.level & BPF_LOG_LEVEL2)
2283                 for (i = 0; i < env->subprog_cnt; i++)
2284                         verbose(env, "func#%d @%d\n", i, subprog[i].start);
2285
2286         return 0;
2287 }
2288
2289 static int check_subprogs(struct bpf_verifier_env *env)
2290 {
2291         int i, subprog_start, subprog_end, off, cur_subprog = 0;
2292         struct bpf_subprog_info *subprog = env->subprog_info;
2293         struct bpf_insn *insn = env->prog->insnsi;
2294         int insn_cnt = env->prog->len;
2295
2296         /* now check that all jumps are within the same subprog */
2297         subprog_start = subprog[cur_subprog].start;
2298         subprog_end = subprog[cur_subprog + 1].start;
2299         for (i = 0; i < insn_cnt; i++) {
2300                 u8 code = insn[i].code;
2301
2302                 if (code == (BPF_JMP | BPF_CALL) &&
2303                     insn[i].imm == BPF_FUNC_tail_call &&
2304                     insn[i].src_reg != BPF_PSEUDO_CALL)
2305                         subprog[cur_subprog].has_tail_call = true;
2306                 if (BPF_CLASS(code) == BPF_LD &&
2307                     (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2308                         subprog[cur_subprog].has_ld_abs = true;
2309                 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2310                         goto next;
2311                 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2312                         goto next;
2313                 off = i + insn[i].off + 1;
2314                 if (off < subprog_start || off >= subprog_end) {
2315                         verbose(env, "jump out of range from insn %d to %d\n", i, off);
2316                         return -EINVAL;
2317                 }
2318 next:
2319                 if (i == subprog_end - 1) {
2320                         /* to avoid fall-through from one subprog into another
2321                          * the last insn of the subprog should be either exit
2322                          * or unconditional jump back
2323                          */
2324                         if (code != (BPF_JMP | BPF_EXIT) &&
2325                             code != (BPF_JMP | BPF_JA)) {
2326                                 verbose(env, "last insn is not an exit or jmp\n");
2327                                 return -EINVAL;
2328                         }
2329                         subprog_start = subprog_end;
2330                         cur_subprog++;
2331                         if (cur_subprog < env->subprog_cnt)
2332                                 subprog_end = subprog[cur_subprog + 1].start;
2333                 }
2334         }
2335         return 0;
2336 }
2337
2338 /* Parentage chain of this register (or stack slot) should take care of all
2339  * issues like callee-saved registers, stack slot allocation time, etc.
2340  */
2341 static int mark_reg_read(struct bpf_verifier_env *env,
2342                          const struct bpf_reg_state *state,
2343                          struct bpf_reg_state *parent, u8 flag)
2344 {
2345         bool writes = parent == state->parent; /* Observe write marks */
2346         int cnt = 0;
2347
2348         while (parent) {
2349                 /* if read wasn't screened by an earlier write ... */
2350                 if (writes && state->live & REG_LIVE_WRITTEN)
2351                         break;
2352                 if (parent->live & REG_LIVE_DONE) {
2353                         verbose(env, "verifier BUG type %s var_off %lld off %d\n",
2354                                 reg_type_str(env, parent->type),
2355                                 parent->var_off.value, parent->off);
2356                         return -EFAULT;
2357                 }
2358                 /* The first condition is more likely to be true than the
2359                  * second, checked it first.
2360                  */
2361                 if ((parent->live & REG_LIVE_READ) == flag ||
2362                     parent->live & REG_LIVE_READ64)
2363                         /* The parentage chain never changes and
2364                          * this parent was already marked as LIVE_READ.
2365                          * There is no need to keep walking the chain again and
2366                          * keep re-marking all parents as LIVE_READ.
2367                          * This case happens when the same register is read
2368                          * multiple times without writes into it in-between.
2369                          * Also, if parent has the stronger REG_LIVE_READ64 set,
2370                          * then no need to set the weak REG_LIVE_READ32.
2371                          */
2372                         break;
2373                 /* ... then we depend on parent's value */
2374                 parent->live |= flag;
2375                 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2376                 if (flag == REG_LIVE_READ64)
2377                         parent->live &= ~REG_LIVE_READ32;
2378                 state = parent;
2379                 parent = state->parent;
2380                 writes = true;
2381                 cnt++;
2382         }
2383
2384         if (env->longest_mark_read_walk < cnt)
2385                 env->longest_mark_read_walk = cnt;
2386         return 0;
2387 }
2388
2389 /* This function is supposed to be used by the following 32-bit optimization
2390  * code only. It returns TRUE if the source or destination register operates
2391  * on 64-bit, otherwise return FALSE.
2392  */
2393 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2394                      u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2395 {
2396         u8 code, class, op;
2397
2398         code = insn->code;
2399         class = BPF_CLASS(code);
2400         op = BPF_OP(code);
2401         if (class == BPF_JMP) {
2402                 /* BPF_EXIT for "main" will reach here. Return TRUE
2403                  * conservatively.
2404                  */
2405                 if (op == BPF_EXIT)
2406                         return true;
2407                 if (op == BPF_CALL) {
2408                         /* BPF to BPF call will reach here because of marking
2409                          * caller saved clobber with DST_OP_NO_MARK for which we
2410                          * don't care the register def because they are anyway
2411                          * marked as NOT_INIT already.
2412                          */
2413                         if (insn->src_reg == BPF_PSEUDO_CALL)
2414                                 return false;
2415                         /* Helper call will reach here because of arg type
2416                          * check, conservatively return TRUE.
2417                          */
2418                         if (t == SRC_OP)
2419                                 return true;
2420
2421                         return false;
2422                 }
2423         }
2424
2425         if (class == BPF_ALU64 || class == BPF_JMP ||
2426             /* BPF_END always use BPF_ALU class. */
2427             (class == BPF_ALU && op == BPF_END && insn->imm == 64))
2428                 return true;
2429
2430         if (class == BPF_ALU || class == BPF_JMP32)
2431                 return false;
2432
2433         if (class == BPF_LDX) {
2434                 if (t != SRC_OP)
2435                         return BPF_SIZE(code) == BPF_DW;
2436                 /* LDX source must be ptr. */
2437                 return true;
2438         }
2439
2440         if (class == BPF_STX) {
2441                 /* BPF_STX (including atomic variants) has multiple source
2442                  * operands, one of which is a ptr. Check whether the caller is
2443                  * asking about it.
2444                  */
2445                 if (t == SRC_OP && reg->type != SCALAR_VALUE)
2446                         return true;
2447                 return BPF_SIZE(code) == BPF_DW;
2448         }
2449
2450         if (class == BPF_LD) {
2451                 u8 mode = BPF_MODE(code);
2452
2453                 /* LD_IMM64 */
2454                 if (mode == BPF_IMM)
2455                         return true;
2456
2457                 /* Both LD_IND and LD_ABS return 32-bit data. */
2458                 if (t != SRC_OP)
2459                         return  false;
2460
2461                 /* Implicit ctx ptr. */
2462                 if (regno == BPF_REG_6)
2463                         return true;
2464
2465                 /* Explicit source could be any width. */
2466                 return true;
2467         }
2468
2469         if (class == BPF_ST)
2470                 /* The only source register for BPF_ST is a ptr. */
2471                 return true;
2472
2473         /* Conservatively return true at default. */
2474         return true;
2475 }
2476
2477 /* Return the regno defined by the insn, or -1. */
2478 static int insn_def_regno(const struct bpf_insn *insn)
2479 {
2480         switch (BPF_CLASS(insn->code)) {
2481         case BPF_JMP:
2482         case BPF_JMP32:
2483         case BPF_ST:
2484                 return -1;
2485         case BPF_STX:
2486                 if (BPF_MODE(insn->code) == BPF_ATOMIC &&
2487                     (insn->imm & BPF_FETCH)) {
2488                         if (insn->imm == BPF_CMPXCHG)
2489                                 return BPF_REG_0;
2490                         else
2491                                 return insn->src_reg;
2492                 } else {
2493                         return -1;
2494                 }
2495         default:
2496                 return insn->dst_reg;
2497         }
2498 }
2499
2500 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
2501 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
2502 {
2503         int dst_reg = insn_def_regno(insn);
2504
2505         if (dst_reg == -1)
2506                 return false;
2507
2508         return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
2509 }
2510
2511 static void mark_insn_zext(struct bpf_verifier_env *env,
2512                            struct bpf_reg_state *reg)
2513 {
2514         s32 def_idx = reg->subreg_def;
2515
2516         if (def_idx == DEF_NOT_SUBREG)
2517                 return;
2518
2519         env->insn_aux_data[def_idx - 1].zext_dst = true;
2520         /* The dst will be zero extended, so won't be sub-register anymore. */
2521         reg->subreg_def = DEF_NOT_SUBREG;
2522 }
2523
2524 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
2525                          enum reg_arg_type t)
2526 {
2527         struct bpf_verifier_state *vstate = env->cur_state;
2528         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2529         struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
2530         struct bpf_reg_state *reg, *regs = state->regs;
2531         bool rw64;
2532
2533         if (regno >= MAX_BPF_REG) {
2534                 verbose(env, "R%d is invalid\n", regno);
2535                 return -EINVAL;
2536         }
2537
2538         mark_reg_scratched(env, regno);
2539
2540         reg = &regs[regno];
2541         rw64 = is_reg64(env, insn, regno, reg, t);
2542         if (t == SRC_OP) {
2543                 /* check whether register used as source operand can be read */
2544                 if (reg->type == NOT_INIT) {
2545                         verbose(env, "R%d !read_ok\n", regno);
2546                         return -EACCES;
2547                 }
2548                 /* We don't need to worry about FP liveness because it's read-only */
2549                 if (regno == BPF_REG_FP)
2550                         return 0;
2551
2552                 if (rw64)
2553                         mark_insn_zext(env, reg);
2554
2555                 return mark_reg_read(env, reg, reg->parent,
2556                                      rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
2557         } else {
2558                 /* check whether register used as dest operand can be written to */
2559                 if (regno == BPF_REG_FP) {
2560                         verbose(env, "frame pointer is read only\n");
2561                         return -EACCES;
2562                 }
2563                 reg->live |= REG_LIVE_WRITTEN;
2564                 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
2565                 if (t == DST_OP)
2566                         mark_reg_unknown(env, regs, regno);
2567         }
2568         return 0;
2569 }
2570
2571 static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
2572 {
2573         env->insn_aux_data[idx].jmp_point = true;
2574 }
2575
2576 static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
2577 {
2578         return env->insn_aux_data[insn_idx].jmp_point;
2579 }
2580
2581 /* for any branch, call, exit record the history of jmps in the given state */
2582 static int push_jmp_history(struct bpf_verifier_env *env,
2583                             struct bpf_verifier_state *cur)
2584 {
2585         u32 cnt = cur->jmp_history_cnt;
2586         struct bpf_idx_pair *p;
2587         size_t alloc_size;
2588
2589         if (!is_jmp_point(env, env->insn_idx))
2590                 return 0;
2591
2592         cnt++;
2593         alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
2594         p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
2595         if (!p)
2596                 return -ENOMEM;
2597         p[cnt - 1].idx = env->insn_idx;
2598         p[cnt - 1].prev_idx = env->prev_insn_idx;
2599         cur->jmp_history = p;
2600         cur->jmp_history_cnt = cnt;
2601         return 0;
2602 }
2603
2604 /* Backtrack one insn at a time. If idx is not at the top of recorded
2605  * history then previous instruction came from straight line execution.
2606  */
2607 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
2608                              u32 *history)
2609 {
2610         u32 cnt = *history;
2611
2612         if (cnt && st->jmp_history[cnt - 1].idx == i) {
2613                 i = st->jmp_history[cnt - 1].prev_idx;
2614                 (*history)--;
2615         } else {
2616                 i--;
2617         }
2618         return i;
2619 }
2620
2621 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
2622 {
2623         const struct btf_type *func;
2624         struct btf *desc_btf;
2625
2626         if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
2627                 return NULL;
2628
2629         desc_btf = find_kfunc_desc_btf(data, insn->off);
2630         if (IS_ERR(desc_btf))
2631                 return "<error>";
2632
2633         func = btf_type_by_id(desc_btf, insn->imm);
2634         return btf_name_by_offset(desc_btf, func->name_off);
2635 }
2636
2637 /* For given verifier state backtrack_insn() is called from the last insn to
2638  * the first insn. Its purpose is to compute a bitmask of registers and
2639  * stack slots that needs precision in the parent verifier state.
2640  */
2641 static int backtrack_insn(struct bpf_verifier_env *env, int idx,
2642                           u32 *reg_mask, u64 *stack_mask)
2643 {
2644         const struct bpf_insn_cbs cbs = {
2645                 .cb_call        = disasm_kfunc_name,
2646                 .cb_print       = verbose,
2647                 .private_data   = env,
2648         };
2649         struct bpf_insn *insn = env->prog->insnsi + idx;
2650         u8 class = BPF_CLASS(insn->code);
2651         u8 opcode = BPF_OP(insn->code);
2652         u8 mode = BPF_MODE(insn->code);
2653         u32 dreg = 1u << insn->dst_reg;
2654         u32 sreg = 1u << insn->src_reg;
2655         u32 spi;
2656
2657         if (insn->code == 0)
2658                 return 0;
2659         if (env->log.level & BPF_LOG_LEVEL2) {
2660                 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
2661                 verbose(env, "%d: ", idx);
2662                 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2663         }
2664
2665         if (class == BPF_ALU || class == BPF_ALU64) {
2666                 if (!(*reg_mask & dreg))
2667                         return 0;
2668                 if (opcode == BPF_MOV) {
2669                         if (BPF_SRC(insn->code) == BPF_X) {
2670                                 /* dreg = sreg
2671                                  * dreg needs precision after this insn
2672                                  * sreg needs precision before this insn
2673                                  */
2674                                 *reg_mask &= ~dreg;
2675                                 *reg_mask |= sreg;
2676                         } else {
2677                                 /* dreg = K
2678                                  * dreg needs precision after this insn.
2679                                  * Corresponding register is already marked
2680                                  * as precise=true in this verifier state.
2681                                  * No further markings in parent are necessary
2682                                  */
2683                                 *reg_mask &= ~dreg;
2684                         }
2685                 } else {
2686                         if (BPF_SRC(insn->code) == BPF_X) {
2687                                 /* dreg += sreg
2688                                  * both dreg and sreg need precision
2689                                  * before this insn
2690                                  */
2691                                 *reg_mask |= sreg;
2692                         } /* else dreg += K
2693                            * dreg still needs precision before this insn
2694                            */
2695                 }
2696         } else if (class == BPF_LDX) {
2697                 if (!(*reg_mask & dreg))
2698                         return 0;
2699                 *reg_mask &= ~dreg;
2700
2701                 /* scalars can only be spilled into stack w/o losing precision.
2702                  * Load from any other memory can be zero extended.
2703                  * The desire to keep that precision is already indicated
2704                  * by 'precise' mark in corresponding register of this state.
2705                  * No further tracking necessary.
2706                  */
2707                 if (insn->src_reg != BPF_REG_FP)
2708                         return 0;
2709
2710                 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
2711                  * that [fp - off] slot contains scalar that needs to be
2712                  * tracked with precision
2713                  */
2714                 spi = (-insn->off - 1) / BPF_REG_SIZE;
2715                 if (spi >= 64) {
2716                         verbose(env, "BUG spi %d\n", spi);
2717                         WARN_ONCE(1, "verifier backtracking bug");
2718                         return -EFAULT;
2719                 }
2720                 *stack_mask |= 1ull << spi;
2721         } else if (class == BPF_STX || class == BPF_ST) {
2722                 if (*reg_mask & dreg)
2723                         /* stx & st shouldn't be using _scalar_ dst_reg
2724                          * to access memory. It means backtracking
2725                          * encountered a case of pointer subtraction.
2726                          */
2727                         return -ENOTSUPP;
2728                 /* scalars can only be spilled into stack */
2729                 if (insn->dst_reg != BPF_REG_FP)
2730                         return 0;
2731                 spi = (-insn->off - 1) / BPF_REG_SIZE;
2732                 if (spi >= 64) {
2733                         verbose(env, "BUG spi %d\n", spi);
2734                         WARN_ONCE(1, "verifier backtracking bug");
2735                         return -EFAULT;
2736                 }
2737                 if (!(*stack_mask & (1ull << spi)))
2738                         return 0;
2739                 *stack_mask &= ~(1ull << spi);
2740                 if (class == BPF_STX)
2741                         *reg_mask |= sreg;
2742         } else if (class == BPF_JMP || class == BPF_JMP32) {
2743                 if (opcode == BPF_CALL) {
2744                         if (insn->src_reg == BPF_PSEUDO_CALL)
2745                                 return -ENOTSUPP;
2746                         /* BPF helpers that invoke callback subprogs are
2747                          * equivalent to BPF_PSEUDO_CALL above
2748                          */
2749                         if (insn->src_reg == 0 && is_callback_calling_function(insn->imm))
2750                                 return -ENOTSUPP;
2751                         /* regular helper call sets R0 */
2752                         *reg_mask &= ~1;
2753                         if (*reg_mask & 0x3f) {
2754                                 /* if backtracing was looking for registers R1-R5
2755                                  * they should have been found already.
2756                                  */
2757                                 verbose(env, "BUG regs %x\n", *reg_mask);
2758                                 WARN_ONCE(1, "verifier backtracking bug");
2759                                 return -EFAULT;
2760                         }
2761                 } else if (opcode == BPF_EXIT) {
2762                         return -ENOTSUPP;
2763                 }
2764         } else if (class == BPF_LD) {
2765                 if (!(*reg_mask & dreg))
2766                         return 0;
2767                 *reg_mask &= ~dreg;
2768                 /* It's ld_imm64 or ld_abs or ld_ind.
2769                  * For ld_imm64 no further tracking of precision
2770                  * into parent is necessary
2771                  */
2772                 if (mode == BPF_IND || mode == BPF_ABS)
2773                         /* to be analyzed */
2774                         return -ENOTSUPP;
2775         }
2776         return 0;
2777 }
2778
2779 /* the scalar precision tracking algorithm:
2780  * . at the start all registers have precise=false.
2781  * . scalar ranges are tracked as normal through alu and jmp insns.
2782  * . once precise value of the scalar register is used in:
2783  *   .  ptr + scalar alu
2784  *   . if (scalar cond K|scalar)
2785  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
2786  *   backtrack through the verifier states and mark all registers and
2787  *   stack slots with spilled constants that these scalar regisers
2788  *   should be precise.
2789  * . during state pruning two registers (or spilled stack slots)
2790  *   are equivalent if both are not precise.
2791  *
2792  * Note the verifier cannot simply walk register parentage chain,
2793  * since many different registers and stack slots could have been
2794  * used to compute single precise scalar.
2795  *
2796  * The approach of starting with precise=true for all registers and then
2797  * backtrack to mark a register as not precise when the verifier detects
2798  * that program doesn't care about specific value (e.g., when helper
2799  * takes register as ARG_ANYTHING parameter) is not safe.
2800  *
2801  * It's ok to walk single parentage chain of the verifier states.
2802  * It's possible that this backtracking will go all the way till 1st insn.
2803  * All other branches will be explored for needing precision later.
2804  *
2805  * The backtracking needs to deal with cases like:
2806  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
2807  * r9 -= r8
2808  * r5 = r9
2809  * if r5 > 0x79f goto pc+7
2810  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
2811  * r5 += 1
2812  * ...
2813  * call bpf_perf_event_output#25
2814  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
2815  *
2816  * and this case:
2817  * r6 = 1
2818  * call foo // uses callee's r6 inside to compute r0
2819  * r0 += r6
2820  * if r0 == 0 goto
2821  *
2822  * to track above reg_mask/stack_mask needs to be independent for each frame.
2823  *
2824  * Also if parent's curframe > frame where backtracking started,
2825  * the verifier need to mark registers in both frames, otherwise callees
2826  * may incorrectly prune callers. This is similar to
2827  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
2828  *
2829  * For now backtracking falls back into conservative marking.
2830  */
2831 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
2832                                      struct bpf_verifier_state *st)
2833 {
2834         struct bpf_func_state *func;
2835         struct bpf_reg_state *reg;
2836         int i, j;
2837
2838         /* big hammer: mark all scalars precise in this path.
2839          * pop_stack may still get !precise scalars.
2840          * We also skip current state and go straight to first parent state,
2841          * because precision markings in current non-checkpointed state are
2842          * not needed. See why in the comment in __mark_chain_precision below.
2843          */
2844         for (st = st->parent; st; st = st->parent) {
2845                 for (i = 0; i <= st->curframe; i++) {
2846                         func = st->frame[i];
2847                         for (j = 0; j < BPF_REG_FP; j++) {
2848                                 reg = &func->regs[j];
2849                                 if (reg->type != SCALAR_VALUE)
2850                                         continue;
2851                                 reg->precise = true;
2852                         }
2853                         for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2854                                 if (!is_spilled_reg(&func->stack[j]))
2855                                         continue;
2856                                 reg = &func->stack[j].spilled_ptr;
2857                                 if (reg->type != SCALAR_VALUE)
2858                                         continue;
2859                                 reg->precise = true;
2860                         }
2861                 }
2862         }
2863 }
2864
2865 static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
2866 {
2867         struct bpf_func_state *func;
2868         struct bpf_reg_state *reg;
2869         int i, j;
2870
2871         for (i = 0; i <= st->curframe; i++) {
2872                 func = st->frame[i];
2873                 for (j = 0; j < BPF_REG_FP; j++) {
2874                         reg = &func->regs[j];
2875                         if (reg->type != SCALAR_VALUE)
2876                                 continue;
2877                         reg->precise = false;
2878                 }
2879                 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2880                         if (!is_spilled_reg(&func->stack[j]))
2881                                 continue;
2882                         reg = &func->stack[j].spilled_ptr;
2883                         if (reg->type != SCALAR_VALUE)
2884                                 continue;
2885                         reg->precise = false;
2886                 }
2887         }
2888 }
2889
2890 /*
2891  * __mark_chain_precision() backtracks BPF program instruction sequence and
2892  * chain of verifier states making sure that register *regno* (if regno >= 0)
2893  * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
2894  * SCALARS, as well as any other registers and slots that contribute to
2895  * a tracked state of given registers/stack slots, depending on specific BPF
2896  * assembly instructions (see backtrack_insns() for exact instruction handling
2897  * logic). This backtracking relies on recorded jmp_history and is able to
2898  * traverse entire chain of parent states. This process ends only when all the
2899  * necessary registers/slots and their transitive dependencies are marked as
2900  * precise.
2901  *
2902  * One important and subtle aspect is that precise marks *do not matter* in
2903  * the currently verified state (current state). It is important to understand
2904  * why this is the case.
2905  *
2906  * First, note that current state is the state that is not yet "checkpointed",
2907  * i.e., it is not yet put into env->explored_states, and it has no children
2908  * states as well. It's ephemeral, and can end up either a) being discarded if
2909  * compatible explored state is found at some point or BPF_EXIT instruction is
2910  * reached or b) checkpointed and put into env->explored_states, branching out
2911  * into one or more children states.
2912  *
2913  * In the former case, precise markings in current state are completely
2914  * ignored by state comparison code (see regsafe() for details). Only
2915  * checkpointed ("old") state precise markings are important, and if old
2916  * state's register/slot is precise, regsafe() assumes current state's
2917  * register/slot as precise and checks value ranges exactly and precisely. If
2918  * states turn out to be compatible, current state's necessary precise
2919  * markings and any required parent states' precise markings are enforced
2920  * after the fact with propagate_precision() logic, after the fact. But it's
2921  * important to realize that in this case, even after marking current state
2922  * registers/slots as precise, we immediately discard current state. So what
2923  * actually matters is any of the precise markings propagated into current
2924  * state's parent states, which are always checkpointed (due to b) case above).
2925  * As such, for scenario a) it doesn't matter if current state has precise
2926  * markings set or not.
2927  *
2928  * Now, for the scenario b), checkpointing and forking into child(ren)
2929  * state(s). Note that before current state gets to checkpointing step, any
2930  * processed instruction always assumes precise SCALAR register/slot
2931  * knowledge: if precise value or range is useful to prune jump branch, BPF
2932  * verifier takes this opportunity enthusiastically. Similarly, when
2933  * register's value is used to calculate offset or memory address, exact
2934  * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
2935  * what we mentioned above about state comparison ignoring precise markings
2936  * during state comparison, BPF verifier ignores and also assumes precise
2937  * markings *at will* during instruction verification process. But as verifier
2938  * assumes precision, it also propagates any precision dependencies across
2939  * parent states, which are not yet finalized, so can be further restricted
2940  * based on new knowledge gained from restrictions enforced by their children
2941  * states. This is so that once those parent states are finalized, i.e., when
2942  * they have no more active children state, state comparison logic in
2943  * is_state_visited() would enforce strict and precise SCALAR ranges, if
2944  * required for correctness.
2945  *
2946  * To build a bit more intuition, note also that once a state is checkpointed,
2947  * the path we took to get to that state is not important. This is crucial
2948  * property for state pruning. When state is checkpointed and finalized at
2949  * some instruction index, it can be correctly and safely used to "short
2950  * circuit" any *compatible* state that reaches exactly the same instruction
2951  * index. I.e., if we jumped to that instruction from a completely different
2952  * code path than original finalized state was derived from, it doesn't
2953  * matter, current state can be discarded because from that instruction
2954  * forward having a compatible state will ensure we will safely reach the
2955  * exit. States describe preconditions for further exploration, but completely
2956  * forget the history of how we got here.
2957  *
2958  * This also means that even if we needed precise SCALAR range to get to
2959  * finalized state, but from that point forward *that same* SCALAR register is
2960  * never used in a precise context (i.e., it's precise value is not needed for
2961  * correctness), it's correct and safe to mark such register as "imprecise"
2962  * (i.e., precise marking set to false). This is what we rely on when we do
2963  * not set precise marking in current state. If no child state requires
2964  * precision for any given SCALAR register, it's safe to dictate that it can
2965  * be imprecise. If any child state does require this register to be precise,
2966  * we'll mark it precise later retroactively during precise markings
2967  * propagation from child state to parent states.
2968  *
2969  * Skipping precise marking setting in current state is a mild version of
2970  * relying on the above observation. But we can utilize this property even
2971  * more aggressively by proactively forgetting any precise marking in the
2972  * current state (which we inherited from the parent state), right before we
2973  * checkpoint it and branch off into new child state. This is done by
2974  * mark_all_scalars_imprecise() to hopefully get more permissive and generic
2975  * finalized states which help in short circuiting more future states.
2976  */
2977 static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno,
2978                                   int spi)
2979 {
2980         struct bpf_verifier_state *st = env->cur_state;
2981         int first_idx = st->first_insn_idx;
2982         int last_idx = env->insn_idx;
2983         struct bpf_func_state *func;
2984         struct bpf_reg_state *reg;
2985         u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2986         u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2987         bool skip_first = true;
2988         bool new_marks = false;
2989         int i, err;
2990
2991         if (!env->bpf_capable)
2992                 return 0;
2993
2994         /* Do sanity checks against current state of register and/or stack
2995          * slot, but don't set precise flag in current state, as precision
2996          * tracking in the current state is unnecessary.
2997          */
2998         func = st->frame[frame];
2999         if (regno >= 0) {
3000                 reg = &func->regs[regno];
3001                 if (reg->type != SCALAR_VALUE) {
3002                         WARN_ONCE(1, "backtracing misuse");
3003                         return -EFAULT;
3004                 }
3005                 new_marks = true;
3006         }
3007
3008         while (spi >= 0) {
3009                 if (!is_spilled_reg(&func->stack[spi])) {
3010                         stack_mask = 0;
3011                         break;
3012                 }
3013                 reg = &func->stack[spi].spilled_ptr;
3014                 if (reg->type != SCALAR_VALUE) {
3015                         stack_mask = 0;
3016                         break;
3017                 }
3018                 new_marks = true;
3019                 break;
3020         }
3021
3022         if (!new_marks)
3023                 return 0;
3024         if (!reg_mask && !stack_mask)
3025                 return 0;
3026
3027         for (;;) {
3028                 DECLARE_BITMAP(mask, 64);
3029                 u32 history = st->jmp_history_cnt;
3030
3031                 if (env->log.level & BPF_LOG_LEVEL2)
3032                         verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
3033
3034                 if (last_idx < 0) {
3035                         /* we are at the entry into subprog, which
3036                          * is expected for global funcs, but only if
3037                          * requested precise registers are R1-R5
3038                          * (which are global func's input arguments)
3039                          */
3040                         if (st->curframe == 0 &&
3041                             st->frame[0]->subprogno > 0 &&
3042                             st->frame[0]->callsite == BPF_MAIN_FUNC &&
3043                             stack_mask == 0 && (reg_mask & ~0x3e) == 0) {
3044                                 bitmap_from_u64(mask, reg_mask);
3045                                 for_each_set_bit(i, mask, 32) {
3046                                         reg = &st->frame[0]->regs[i];
3047                                         if (reg->type != SCALAR_VALUE) {
3048                                                 reg_mask &= ~(1u << i);
3049                                                 continue;
3050                                         }
3051                                         reg->precise = true;
3052                                 }
3053                                 return 0;
3054                         }
3055
3056                         verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n",
3057                                 st->frame[0]->subprogno, reg_mask, stack_mask);
3058                         WARN_ONCE(1, "verifier backtracking bug");
3059                         return -EFAULT;
3060                 }
3061
3062                 for (i = last_idx;;) {
3063                         if (skip_first) {
3064                                 err = 0;
3065                                 skip_first = false;
3066                         } else {
3067                                 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
3068                         }
3069                         if (err == -ENOTSUPP) {
3070                                 mark_all_scalars_precise(env, st);
3071                                 return 0;
3072                         } else if (err) {
3073                                 return err;
3074                         }
3075                         if (!reg_mask && !stack_mask)
3076                                 /* Found assignment(s) into tracked register in this state.
3077                                  * Since this state is already marked, just return.
3078                                  * Nothing to be tracked further in the parent state.
3079                                  */
3080                                 return 0;
3081                         if (i == first_idx)
3082                                 break;
3083                         i = get_prev_insn_idx(st, i, &history);
3084                         if (i >= env->prog->len) {
3085                                 /* This can happen if backtracking reached insn 0
3086                                  * and there are still reg_mask or stack_mask
3087                                  * to backtrack.
3088                                  * It means the backtracking missed the spot where
3089                                  * particular register was initialized with a constant.
3090                                  */
3091                                 verbose(env, "BUG backtracking idx %d\n", i);
3092                                 WARN_ONCE(1, "verifier backtracking bug");
3093                                 return -EFAULT;
3094                         }
3095                 }
3096                 st = st->parent;
3097                 if (!st)
3098                         break;
3099
3100                 new_marks = false;
3101                 func = st->frame[frame];
3102                 bitmap_from_u64(mask, reg_mask);
3103                 for_each_set_bit(i, mask, 32) {
3104                         reg = &func->regs[i];
3105                         if (reg->type != SCALAR_VALUE) {
3106                                 reg_mask &= ~(1u << i);
3107                                 continue;
3108                         }
3109                         if (!reg->precise)
3110                                 new_marks = true;
3111                         reg->precise = true;
3112                 }
3113
3114                 bitmap_from_u64(mask, stack_mask);
3115                 for_each_set_bit(i, mask, 64) {
3116                         if (i >= func->allocated_stack / BPF_REG_SIZE) {
3117                                 /* the sequence of instructions:
3118                                  * 2: (bf) r3 = r10
3119                                  * 3: (7b) *(u64 *)(r3 -8) = r0
3120                                  * 4: (79) r4 = *(u64 *)(r10 -8)
3121                                  * doesn't contain jmps. It's backtracked
3122                                  * as a single block.
3123                                  * During backtracking insn 3 is not recognized as
3124                                  * stack access, so at the end of backtracking
3125                                  * stack slot fp-8 is still marked in stack_mask.
3126                                  * However the parent state may not have accessed
3127                                  * fp-8 and it's "unallocated" stack space.
3128                                  * In such case fallback to conservative.
3129                                  */
3130                                 mark_all_scalars_precise(env, st);
3131                                 return 0;
3132                         }
3133
3134                         if (!is_spilled_reg(&func->stack[i])) {
3135                                 stack_mask &= ~(1ull << i);
3136                                 continue;
3137                         }
3138                         reg = &func->stack[i].spilled_ptr;
3139                         if (reg->type != SCALAR_VALUE) {
3140                                 stack_mask &= ~(1ull << i);
3141                                 continue;
3142                         }
3143                         if (!reg->precise)
3144                                 new_marks = true;
3145                         reg->precise = true;
3146                 }
3147                 if (env->log.level & BPF_LOG_LEVEL2) {
3148                         verbose(env, "parent %s regs=%x stack=%llx marks:",
3149                                 new_marks ? "didn't have" : "already had",
3150                                 reg_mask, stack_mask);
3151                         print_verifier_state(env, func, true);
3152                 }
3153
3154                 if (!reg_mask && !stack_mask)
3155                         break;
3156                 if (!new_marks)
3157                         break;
3158
3159                 last_idx = st->last_insn_idx;
3160                 first_idx = st->first_insn_idx;
3161         }
3162         return 0;
3163 }
3164
3165 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
3166 {
3167         return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
3168 }
3169
3170 static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
3171 {
3172         return __mark_chain_precision(env, frame, regno, -1);
3173 }
3174
3175 static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
3176 {
3177         return __mark_chain_precision(env, frame, -1, spi);
3178 }
3179
3180 static bool is_spillable_regtype(enum bpf_reg_type type)
3181 {
3182         switch (base_type(type)) {
3183         case PTR_TO_MAP_VALUE:
3184         case PTR_TO_STACK:
3185         case PTR_TO_CTX:
3186         case PTR_TO_PACKET:
3187         case PTR_TO_PACKET_META:
3188         case PTR_TO_PACKET_END:
3189         case PTR_TO_FLOW_KEYS:
3190         case CONST_PTR_TO_MAP:
3191         case PTR_TO_SOCKET:
3192         case PTR_TO_SOCK_COMMON:
3193         case PTR_TO_TCP_SOCK:
3194         case PTR_TO_XDP_SOCK:
3195         case PTR_TO_BTF_ID:
3196         case PTR_TO_BUF:
3197         case PTR_TO_MEM:
3198         case PTR_TO_FUNC:
3199         case PTR_TO_MAP_KEY:
3200                 return true;
3201         default:
3202                 return false;
3203         }
3204 }
3205
3206 /* Does this register contain a constant zero? */
3207 static bool register_is_null(struct bpf_reg_state *reg)
3208 {
3209         return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
3210 }
3211
3212 static bool register_is_const(struct bpf_reg_state *reg)
3213 {
3214         return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
3215 }
3216
3217 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
3218 {
3219         return tnum_is_unknown(reg->var_off) &&
3220                reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
3221                reg->umin_value == 0 && reg->umax_value == U64_MAX &&
3222                reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
3223                reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
3224 }
3225
3226 static bool register_is_bounded(struct bpf_reg_state *reg)
3227 {
3228         return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
3229 }
3230
3231 static bool __is_pointer_value(bool allow_ptr_leaks,
3232                                const struct bpf_reg_state *reg)
3233 {
3234         if (allow_ptr_leaks)
3235                 return false;
3236
3237         return reg->type != SCALAR_VALUE;
3238 }
3239
3240 static void save_register_state(struct bpf_func_state *state,
3241                                 int spi, struct bpf_reg_state *reg,
3242                                 int size)
3243 {
3244         int i;
3245
3246         state->stack[spi].spilled_ptr = *reg;
3247         if (size == BPF_REG_SIZE)
3248                 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
3249
3250         for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
3251                 state->stack[spi].slot_type[i - 1] = STACK_SPILL;
3252
3253         /* size < 8 bytes spill */
3254         for (; i; i--)
3255                 scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
3256 }
3257
3258 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
3259  * stack boundary and alignment are checked in check_mem_access()
3260  */
3261 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
3262                                        /* stack frame we're writing to */
3263                                        struct bpf_func_state *state,
3264                                        int off, int size, int value_regno,
3265                                        int insn_idx)
3266 {
3267         struct bpf_func_state *cur; /* state of the current function */
3268         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
3269         u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
3270         struct bpf_reg_state *reg = NULL;
3271
3272         err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
3273         if (err)
3274                 return err;
3275         /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
3276          * so it's aligned access and [off, off + size) are within stack limits
3277          */
3278         if (!env->allow_ptr_leaks &&
3279             state->stack[spi].slot_type[0] == STACK_SPILL &&
3280             size != BPF_REG_SIZE) {
3281                 verbose(env, "attempt to corrupt spilled pointer on stack\n");
3282                 return -EACCES;
3283         }
3284
3285         cur = env->cur_state->frame[env->cur_state->curframe];
3286         if (value_regno >= 0)
3287                 reg = &cur->regs[value_regno];
3288         if (!env->bypass_spec_v4) {
3289                 bool sanitize = reg && is_spillable_regtype(reg->type);
3290
3291                 for (i = 0; i < size; i++) {
3292                         if (state->stack[spi].slot_type[i] == STACK_INVALID) {
3293                                 sanitize = true;
3294                                 break;
3295                         }
3296                 }
3297
3298                 if (sanitize)
3299                         env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
3300         }
3301
3302         mark_stack_slot_scratched(env, spi);
3303         if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
3304             !register_is_null(reg) && env->bpf_capable) {
3305                 if (dst_reg != BPF_REG_FP) {
3306                         /* The backtracking logic can only recognize explicit
3307                          * stack slot address like [fp - 8]. Other spill of
3308                          * scalar via different register has to be conservative.
3309                          * Backtrack from here and mark all registers as precise
3310                          * that contributed into 'reg' being a constant.
3311                          */
3312                         err = mark_chain_precision(env, value_regno);
3313                         if (err)
3314                                 return err;
3315                 }
3316                 save_register_state(state, spi, reg, size);
3317         } else if (reg && is_spillable_regtype(reg->type)) {
3318                 /* register containing pointer is being spilled into stack */
3319                 if (size != BPF_REG_SIZE) {
3320                         verbose_linfo(env, insn_idx, "; ");
3321                         verbose(env, "invalid size of register spill\n");
3322                         return -EACCES;
3323                 }
3324                 if (state != cur && reg->type == PTR_TO_STACK) {
3325                         verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
3326                         return -EINVAL;
3327                 }
3328                 save_register_state(state, spi, reg, size);
3329         } else {
3330                 u8 type = STACK_MISC;
3331
3332                 /* regular write of data into stack destroys any spilled ptr */
3333                 state->stack[spi].spilled_ptr.type = NOT_INIT;
3334                 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
3335                 if (is_spilled_reg(&state->stack[spi]))
3336                         for (i = 0; i < BPF_REG_SIZE; i++)
3337                                 scrub_spilled_slot(&state->stack[spi].slot_type[i]);
3338
3339                 /* only mark the slot as written if all 8 bytes were written
3340                  * otherwise read propagation may incorrectly stop too soon
3341                  * when stack slots are partially written.
3342                  * This heuristic means that read propagation will be
3343                  * conservative, since it will add reg_live_read marks
3344                  * to stack slots all the way to first state when programs
3345                  * writes+reads less than 8 bytes
3346                  */
3347                 if (size == BPF_REG_SIZE)
3348                         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
3349
3350                 /* when we zero initialize stack slots mark them as such */
3351                 if (reg && register_is_null(reg)) {
3352                         /* backtracking doesn't work for STACK_ZERO yet. */
3353                         err = mark_chain_precision(env, value_regno);
3354                         if (err)
3355                                 return err;
3356                         type = STACK_ZERO;
3357                 }
3358
3359                 /* Mark slots affected by this stack write. */
3360                 for (i = 0; i < size; i++)
3361                         state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
3362                                 type;
3363         }
3364         return 0;
3365 }
3366
3367 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
3368  * known to contain a variable offset.
3369  * This function checks whether the write is permitted and conservatively
3370  * tracks the effects of the write, considering that each stack slot in the
3371  * dynamic range is potentially written to.
3372  *
3373  * 'off' includes 'regno->off'.
3374  * 'value_regno' can be -1, meaning that an unknown value is being written to
3375  * the stack.
3376  *
3377  * Spilled pointers in range are not marked as written because we don't know
3378  * what's going to be actually written. This means that read propagation for
3379  * future reads cannot be terminated by this write.
3380  *
3381  * For privileged programs, uninitialized stack slots are considered
3382  * initialized by this write (even though we don't know exactly what offsets
3383  * are going to be written to). The idea is that we don't want the verifier to
3384  * reject future reads that access slots written to through variable offsets.
3385  */
3386 static int check_stack_write_var_off(struct bpf_verifier_env *env,
3387                                      /* func where register points to */
3388                                      struct bpf_func_state *state,
3389                                      int ptr_regno, int off, int size,
3390                                      int value_regno, int insn_idx)
3391 {
3392         struct bpf_func_state *cur; /* state of the current function */
3393         int min_off, max_off;
3394         int i, err;
3395         struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
3396         bool writing_zero = false;
3397         /* set if the fact that we're writing a zero is used to let any
3398          * stack slots remain STACK_ZERO
3399          */
3400         bool zero_used = false;
3401
3402         cur = env->cur_state->frame[env->cur_state->curframe];
3403         ptr_reg = &cur->regs[ptr_regno];
3404         min_off = ptr_reg->smin_value + off;
3405         max_off = ptr_reg->smax_value + off + size;
3406         if (value_regno >= 0)
3407                 value_reg = &cur->regs[value_regno];
3408         if (value_reg && register_is_null(value_reg))
3409                 writing_zero = true;
3410
3411         err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
3412         if (err)
3413                 return err;
3414
3415
3416         /* Variable offset writes destroy any spilled pointers in range. */
3417         for (i = min_off; i < max_off; i++) {
3418                 u8 new_type, *stype;
3419                 int slot, spi;
3420
3421                 slot = -i - 1;
3422                 spi = slot / BPF_REG_SIZE;
3423                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3424                 mark_stack_slot_scratched(env, spi);
3425
3426                 if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
3427                         /* Reject the write if range we may write to has not
3428                          * been initialized beforehand. If we didn't reject
3429                          * here, the ptr status would be erased below (even
3430                          * though not all slots are actually overwritten),
3431                          * possibly opening the door to leaks.
3432                          *
3433                          * We do however catch STACK_INVALID case below, and
3434                          * only allow reading possibly uninitialized memory
3435                          * later for CAP_PERFMON, as the write may not happen to
3436                          * that slot.
3437                          */
3438                         verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3439                                 insn_idx, i);
3440                         return -EINVAL;
3441                 }
3442
3443                 /* Erase all spilled pointers. */
3444                 state->stack[spi].spilled_ptr.type = NOT_INIT;
3445
3446                 /* Update the slot type. */
3447                 new_type = STACK_MISC;
3448                 if (writing_zero && *stype == STACK_ZERO) {
3449                         new_type = STACK_ZERO;
3450                         zero_used = true;
3451                 }
3452                 /* If the slot is STACK_INVALID, we check whether it's OK to
3453                  * pretend that it will be initialized by this write. The slot
3454                  * might not actually be written to, and so if we mark it as
3455                  * initialized future reads might leak uninitialized memory.
3456                  * For privileged programs, we will accept such reads to slots
3457                  * that may or may not be written because, if we're reject
3458                  * them, the error would be too confusing.
3459                  */
3460                 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
3461                         verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3462                                         insn_idx, i);
3463                         return -EINVAL;
3464                 }
3465                 *stype = new_type;
3466         }
3467         if (zero_used) {
3468                 /* backtracking doesn't work for STACK_ZERO yet. */
3469                 err = mark_chain_precision(env, value_regno);
3470                 if (err)
3471                         return err;
3472         }
3473         return 0;
3474 }
3475
3476 /* When register 'dst_regno' is assigned some values from stack[min_off,
3477  * max_off), we set the register's type according to the types of the
3478  * respective stack slots. If all the stack values are known to be zeros, then
3479  * so is the destination reg. Otherwise, the register is considered to be
3480  * SCALAR. This function does not deal with register filling; the caller must
3481  * ensure that all spilled registers in the stack range have been marked as
3482  * read.
3483  */
3484 static void mark_reg_stack_read(struct bpf_verifier_env *env,
3485                                 /* func where src register points to */
3486                                 struct bpf_func_state *ptr_state,
3487                                 int min_off, int max_off, int dst_regno)
3488 {
3489         struct bpf_verifier_state *vstate = env->cur_state;
3490         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3491         int i, slot, spi;
3492         u8 *stype;
3493         int zeros = 0;
3494
3495         for (i = min_off; i < max_off; i++) {
3496                 slot = -i - 1;
3497                 spi = slot / BPF_REG_SIZE;
3498                 stype = ptr_state->stack[spi].slot_type;
3499                 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3500                         break;
3501                 zeros++;
3502         }
3503         if (zeros == max_off - min_off) {
3504                 /* any access_size read into register is zero extended,
3505                  * so the whole register == const_zero
3506                  */
3507                 __mark_reg_const_zero(&state->regs[dst_regno]);
3508                 /* backtracking doesn't support STACK_ZERO yet,
3509                  * so mark it precise here, so that later
3510                  * backtracking can stop here.
3511                  * Backtracking may not need this if this register
3512                  * doesn't participate in pointer adjustment.
3513                  * Forward propagation of precise flag is not
3514                  * necessary either. This mark is only to stop
3515                  * backtracking. Any register that contributed
3516                  * to const 0 was marked precise before spill.
3517                  */
3518                 state->regs[dst_regno].precise = true;
3519         } else {
3520                 /* have read misc data from the stack */
3521                 mark_reg_unknown(env, state->regs, dst_regno);
3522         }
3523         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3524 }
3525
3526 /* Read the stack at 'off' and put the results into the register indicated by
3527  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3528  * spilled reg.
3529  *
3530  * 'dst_regno' can be -1, meaning that the read value is not going to a
3531  * register.
3532  *
3533  * The access is assumed to be within the current stack bounds.
3534  */
3535 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3536                                       /* func where src register points to */
3537                                       struct bpf_func_state *reg_state,
3538                                       int off, int size, int dst_regno)
3539 {
3540         struct bpf_verifier_state *vstate = env->cur_state;
3541         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3542         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3543         struct bpf_reg_state *reg;
3544         u8 *stype, type;
3545
3546         stype = reg_state->stack[spi].slot_type;
3547         reg = &reg_state->stack[spi].spilled_ptr;
3548
3549         if (is_spilled_reg(&reg_state->stack[spi])) {
3550                 u8 spill_size = 1;
3551
3552                 for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3553                         spill_size++;
3554
3555                 if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3556                         if (reg->type != SCALAR_VALUE) {
3557                                 verbose_linfo(env, env->insn_idx, "; ");
3558                                 verbose(env, "invalid size of register fill\n");
3559                                 return -EACCES;
3560                         }
3561
3562                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3563                         if (dst_regno < 0)
3564                                 return 0;
3565
3566                         if (!(off % BPF_REG_SIZE) && size == spill_size) {
3567                                 /* The earlier check_reg_arg() has decided the
3568                                  * subreg_def for this insn.  Save it first.
3569                                  */
3570                                 s32 subreg_def = state->regs[dst_regno].subreg_def;
3571
3572                                 state->regs[dst_regno] = *reg;
3573                                 state->regs[dst_regno].subreg_def = subreg_def;
3574                         } else {
3575                                 for (i = 0; i < size; i++) {
3576                                         type = stype[(slot - i) % BPF_REG_SIZE];
3577                                         if (type == STACK_SPILL)
3578                                                 continue;
3579                                         if (type == STACK_MISC)
3580                                                 continue;
3581                                         verbose(env, "invalid read from stack off %d+%d size %d\n",
3582                                                 off, i, size);
3583                                         return -EACCES;
3584                                 }
3585                                 mark_reg_unknown(env, state->regs, dst_regno);
3586                         }
3587                         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3588                         return 0;
3589                 }
3590
3591                 if (dst_regno >= 0) {
3592                         /* restore register state from stack */
3593                         state->regs[dst_regno] = *reg;
3594                         /* mark reg as written since spilled pointer state likely
3595                          * has its liveness marks cleared by is_state_visited()
3596                          * which resets stack/reg liveness for state transitions
3597                          */
3598                         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3599                 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3600                         /* If dst_regno==-1, the caller is asking us whether
3601                          * it is acceptable to use this value as a SCALAR_VALUE
3602                          * (e.g. for XADD).
3603                          * We must not allow unprivileged callers to do that
3604                          * with spilled pointers.
3605                          */
3606                         verbose(env, "leaking pointer from stack off %d\n",
3607                                 off);
3608                         return -EACCES;
3609                 }
3610                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3611         } else {
3612                 for (i = 0; i < size; i++) {
3613                         type = stype[(slot - i) % BPF_REG_SIZE];
3614                         if (type == STACK_MISC)
3615                                 continue;
3616                         if (type == STACK_ZERO)
3617                                 continue;
3618                         verbose(env, "invalid read from stack off %d+%d size %d\n",
3619                                 off, i, size);
3620                         return -EACCES;
3621                 }
3622                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3623                 if (dst_regno >= 0)
3624                         mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3625         }
3626         return 0;
3627 }
3628
3629 enum bpf_access_src {
3630         ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
3631         ACCESS_HELPER = 2,  /* the access is performed by a helper */
3632 };
3633
3634 static int check_stack_range_initialized(struct bpf_verifier_env *env,
3635                                          int regno, int off, int access_size,
3636                                          bool zero_size_allowed,
3637                                          enum bpf_access_src type,
3638                                          struct bpf_call_arg_meta *meta);
3639
3640 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3641 {
3642         return cur_regs(env) + regno;
3643 }
3644
3645 /* Read the stack at 'ptr_regno + off' and put the result into the register
3646  * 'dst_regno'.
3647  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
3648  * but not its variable offset.
3649  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3650  *
3651  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3652  * filling registers (i.e. reads of spilled register cannot be detected when
3653  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3654  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
3655  * offset; for a fixed offset check_stack_read_fixed_off should be used
3656  * instead.
3657  */
3658 static int check_stack_read_var_off(struct bpf_verifier_env *env,
3659                                     int ptr_regno, int off, int size, int dst_regno)
3660 {
3661         /* The state of the source register. */
3662         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3663         struct bpf_func_state *ptr_state = func(env, reg);
3664         int err;
3665         int min_off, max_off;
3666
3667         /* Note that we pass a NULL meta, so raw access will not be permitted.
3668          */
3669         err = check_stack_range_initialized(env, ptr_regno, off, size,
3670                                             false, ACCESS_DIRECT, NULL);
3671         if (err)
3672                 return err;
3673
3674         min_off = reg->smin_value + off;
3675         max_off = reg->smax_value + off;
3676         mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3677         return 0;
3678 }
3679
3680 /* check_stack_read dispatches to check_stack_read_fixed_off or
3681  * check_stack_read_var_off.
3682  *
3683  * The caller must ensure that the offset falls within the allocated stack
3684  * bounds.
3685  *
3686  * 'dst_regno' is a register which will receive the value from the stack. It
3687  * can be -1, meaning that the read value is not going to a register.
3688  */
3689 static int check_stack_read(struct bpf_verifier_env *env,
3690                             int ptr_regno, int off, int size,
3691                             int dst_regno)
3692 {
3693         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3694         struct bpf_func_state *state = func(env, reg);
3695         int err;
3696         /* Some accesses are only permitted with a static offset. */
3697         bool var_off = !tnum_is_const(reg->var_off);
3698
3699         /* The offset is required to be static when reads don't go to a
3700          * register, in order to not leak pointers (see
3701          * check_stack_read_fixed_off).
3702          */
3703         if (dst_regno < 0 && var_off) {
3704                 char tn_buf[48];
3705
3706                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3707                 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
3708                         tn_buf, off, size);
3709                 return -EACCES;
3710         }
3711         /* Variable offset is prohibited for unprivileged mode for simplicity
3712          * since it requires corresponding support in Spectre masking for stack
3713          * ALU. See also retrieve_ptr_limit().
3714          */
3715         if (!env->bypass_spec_v1 && var_off) {
3716                 char tn_buf[48];
3717
3718                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3719                 verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
3720                                 ptr_regno, tn_buf);
3721                 return -EACCES;
3722         }
3723
3724         if (!var_off) {
3725                 off += reg->var_off.value;
3726                 err = check_stack_read_fixed_off(env, state, off, size,
3727                                                  dst_regno);
3728         } else {
3729                 /* Variable offset stack reads need more conservative handling
3730                  * than fixed offset ones. Note that dst_regno >= 0 on this
3731                  * branch.
3732                  */
3733                 err = check_stack_read_var_off(env, ptr_regno, off, size,
3734                                                dst_regno);
3735         }
3736         return err;
3737 }
3738
3739
3740 /* check_stack_write dispatches to check_stack_write_fixed_off or
3741  * check_stack_write_var_off.
3742  *
3743  * 'ptr_regno' is the register used as a pointer into the stack.
3744  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
3745  * 'value_regno' is the register whose value we're writing to the stack. It can
3746  * be -1, meaning that we're not writing from a register.
3747  *
3748  * The caller must ensure that the offset falls within the maximum stack size.
3749  */
3750 static int check_stack_write(struct bpf_verifier_env *env,
3751                              int ptr_regno, int off, int size,
3752                              int value_regno, int insn_idx)
3753 {
3754         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3755         struct bpf_func_state *state = func(env, reg);
3756         int err;
3757
3758         if (tnum_is_const(reg->var_off)) {
3759                 off += reg->var_off.value;
3760                 err = check_stack_write_fixed_off(env, state, off, size,
3761                                                   value_regno, insn_idx);
3762         } else {
3763                 /* Variable offset stack reads need more conservative handling
3764                  * than fixed offset ones.
3765                  */
3766                 err = check_stack_write_var_off(env, state,
3767                                                 ptr_regno, off, size,
3768                                                 value_regno, insn_idx);
3769         }
3770         return err;
3771 }
3772
3773 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
3774                                  int off, int size, enum bpf_access_type type)
3775 {
3776         struct bpf_reg_state *regs = cur_regs(env);
3777         struct bpf_map *map = regs[regno].map_ptr;
3778         u32 cap = bpf_map_flags_to_cap(map);
3779
3780         if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
3781                 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
3782                         map->value_size, off, size);
3783                 return -EACCES;
3784         }
3785
3786         if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
3787                 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
3788                         map->value_size, off, size);
3789                 return -EACCES;
3790         }
3791
3792         return 0;
3793 }
3794
3795 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
3796 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
3797                               int off, int size, u32 mem_size,
3798                               bool zero_size_allowed)
3799 {
3800         bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
3801         struct bpf_reg_state *reg;
3802
3803         if (off >= 0 && size_ok && (u64)off + size <= mem_size)
3804                 return 0;
3805
3806         reg = &cur_regs(env)[regno];
3807         switch (reg->type) {
3808         case PTR_TO_MAP_KEY:
3809                 verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
3810                         mem_size, off, size);
3811                 break;
3812         case PTR_TO_MAP_VALUE:
3813                 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
3814                         mem_size, off, size);
3815                 break;
3816         case PTR_TO_PACKET:
3817         case PTR_TO_PACKET_META:
3818         case PTR_TO_PACKET_END:
3819                 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
3820                         off, size, regno, reg->id, off, mem_size);
3821                 break;
3822         case PTR_TO_MEM:
3823         default:
3824                 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
3825                         mem_size, off, size);
3826         }
3827
3828         return -EACCES;
3829 }
3830
3831 /* check read/write into a memory region with possible variable offset */
3832 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
3833                                    int off, int size, u32 mem_size,
3834                                    bool zero_size_allowed)
3835 {
3836         struct bpf_verifier_state *vstate = env->cur_state;
3837         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3838         struct bpf_reg_state *reg = &state->regs[regno];
3839         int err;
3840
3841         /* We may have adjusted the register pointing to memory region, so we
3842          * need to try adding each of min_value and max_value to off
3843          * to make sure our theoretical access will be safe.
3844          *
3845          * The minimum value is only important with signed
3846          * comparisons where we can't assume the floor of a
3847          * value is 0.  If we are using signed variables for our
3848          * index'es we need to make sure that whatever we use
3849          * will have a set floor within our range.
3850          */
3851         if (reg->smin_value < 0 &&
3852             (reg->smin_value == S64_MIN ||
3853              (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
3854               reg->smin_value + off < 0)) {
3855                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3856                         regno);
3857                 return -EACCES;
3858         }
3859         err = __check_mem_access(env, regno, reg->smin_value + off, size,
3860                                  mem_size, zero_size_allowed);
3861         if (err) {
3862                 verbose(env, "R%d min value is outside of the allowed memory range\n",
3863                         regno);
3864                 return err;
3865         }
3866
3867         /* If we haven't set a max value then we need to bail since we can't be
3868          * sure we won't do bad things.
3869          * If reg->umax_value + off could overflow, treat that as unbounded too.
3870          */
3871         if (reg->umax_value >= BPF_MAX_VAR_OFF) {
3872                 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
3873                         regno);
3874                 return -EACCES;
3875         }
3876         err = __check_mem_access(env, regno, reg->umax_value + off, size,
3877                                  mem_size, zero_size_allowed);
3878         if (err) {
3879                 verbose(env, "R%d max value is outside of the allowed memory range\n",
3880                         regno);
3881                 return err;
3882         }
3883
3884         return 0;
3885 }
3886
3887 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
3888                                const struct bpf_reg_state *reg, int regno,
3889                                bool fixed_off_ok)
3890 {
3891         /* Access to this pointer-typed register or passing it to a helper
3892          * is only allowed in its original, unmodified form.
3893          */
3894
3895         if (reg->off < 0) {
3896                 verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
3897                         reg_type_str(env, reg->type), regno, reg->off);
3898                 return -EACCES;
3899         }
3900
3901         if (!fixed_off_ok && reg->off) {
3902                 verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
3903                         reg_type_str(env, reg->type), regno, reg->off);
3904                 return -EACCES;
3905         }
3906
3907         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3908                 char tn_buf[48];
3909
3910                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3911                 verbose(env, "variable %s access var_off=%s disallowed\n",
3912                         reg_type_str(env, reg->type), tn_buf);
3913                 return -EACCES;
3914         }
3915
3916         return 0;
3917 }
3918
3919 int check_ptr_off_reg(struct bpf_verifier_env *env,
3920                       const struct bpf_reg_state *reg, int regno)
3921 {
3922         return __check_ptr_off_reg(env, reg, regno, false);
3923 }
3924
3925 static int map_kptr_match_type(struct bpf_verifier_env *env,
3926                                struct btf_field *kptr_field,
3927                                struct bpf_reg_state *reg, u32 regno)
3928 {
3929         const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
3930         int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED;
3931         const char *reg_name = "";
3932
3933         /* Only unreferenced case accepts untrusted pointers */
3934         if (kptr_field->type == BPF_KPTR_UNREF)
3935                 perm_flags |= PTR_UNTRUSTED;
3936
3937         if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
3938                 goto bad_type;
3939
3940         if (!btf_is_kernel(reg->btf)) {
3941                 verbose(env, "R%d must point to kernel BTF\n", regno);
3942                 return -EINVAL;
3943         }
3944         /* We need to verify reg->type and reg->btf, before accessing reg->btf */
3945         reg_name = kernel_type_name(reg->btf, reg->btf_id);
3946
3947         /* For ref_ptr case, release function check should ensure we get one
3948          * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
3949          * normal store of unreferenced kptr, we must ensure var_off is zero.
3950          * Since ref_ptr cannot be accessed directly by BPF insns, checks for
3951          * reg->off and reg->ref_obj_id are not needed here.
3952          */
3953         if (__check_ptr_off_reg(env, reg, regno, true))
3954                 return -EACCES;
3955
3956         /* A full type match is needed, as BTF can be vmlinux or module BTF, and
3957          * we also need to take into account the reg->off.
3958          *
3959          * We want to support cases like:
3960          *
3961          * struct foo {
3962          *         struct bar br;
3963          *         struct baz bz;
3964          * };
3965          *
3966          * struct foo *v;
3967          * v = func();        // PTR_TO_BTF_ID
3968          * val->foo = v;      // reg->off is zero, btf and btf_id match type
3969          * val->bar = &v->br; // reg->off is still zero, but we need to retry with
3970          *                    // first member type of struct after comparison fails
3971          * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
3972          *                    // to match type
3973          *
3974          * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
3975          * is zero. We must also ensure that btf_struct_ids_match does not walk
3976          * the struct to match type against first member of struct, i.e. reject
3977          * second case from above. Hence, when type is BPF_KPTR_REF, we set
3978          * strict mode to true for type match.
3979          */
3980         if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
3981                                   kptr_field->kptr.btf, kptr_field->kptr.btf_id,
3982                                   kptr_field->type == BPF_KPTR_REF))
3983                 goto bad_type;
3984         return 0;
3985 bad_type:
3986         verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
3987                 reg_type_str(env, reg->type), reg_name);
3988         verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
3989         if (kptr_field->type == BPF_KPTR_UNREF)
3990                 verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
3991                         targ_name);
3992         else
3993                 verbose(env, "\n");
3994         return -EINVAL;
3995 }
3996
3997 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
3998                                  int value_regno, int insn_idx,
3999                                  struct btf_field *kptr_field)
4000 {
4001         struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4002         int class = BPF_CLASS(insn->code);
4003         struct bpf_reg_state *val_reg;
4004
4005         /* Things we already checked for in check_map_access and caller:
4006          *  - Reject cases where variable offset may touch kptr
4007          *  - size of access (must be BPF_DW)
4008          *  - tnum_is_const(reg->var_off)
4009          *  - kptr_field->offset == off + reg->var_off.value
4010          */
4011         /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
4012         if (BPF_MODE(insn->code) != BPF_MEM) {
4013                 verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
4014                 return -EACCES;
4015         }
4016
4017         /* We only allow loading referenced kptr, since it will be marked as
4018          * untrusted, similar to unreferenced kptr.
4019          */
4020         if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
4021                 verbose(env, "store to referenced kptr disallowed\n");
4022                 return -EACCES;
4023         }
4024
4025         if (class == BPF_LDX) {
4026                 val_reg = reg_state(env, value_regno);
4027                 /* We can simply mark the value_regno receiving the pointer
4028                  * value from map as PTR_TO_BTF_ID, with the correct type.
4029                  */
4030                 mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
4031                                 kptr_field->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
4032                 /* For mark_ptr_or_null_reg */
4033                 val_reg->id = ++env->id_gen;
4034         } else if (class == BPF_STX) {
4035                 val_reg = reg_state(env, value_regno);
4036                 if (!register_is_null(val_reg) &&
4037                     map_kptr_match_type(env, kptr_field, val_reg, value_regno))
4038                         return -EACCES;
4039         } else if (class == BPF_ST) {
4040                 if (insn->imm) {
4041                         verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
4042                                 kptr_field->offset);
4043                         return -EACCES;
4044                 }
4045         } else {
4046                 verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
4047                 return -EACCES;
4048         }
4049         return 0;
4050 }
4051
4052 /* check read/write into a map element with possible variable offset */
4053 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
4054                             int off, int size, bool zero_size_allowed,
4055                             enum bpf_access_src src)
4056 {
4057         struct bpf_verifier_state *vstate = env->cur_state;
4058         struct bpf_func_state *state = vstate->frame[vstate->curframe];
4059         struct bpf_reg_state *reg = &state->regs[regno];
4060         struct bpf_map *map = reg->map_ptr;
4061         struct btf_record *rec;
4062         int err, i;
4063
4064         err = check_mem_region_access(env, regno, off, size, map->value_size,
4065                                       zero_size_allowed);
4066         if (err)
4067                 return err;
4068
4069         if (IS_ERR_OR_NULL(map->record))
4070                 return 0;
4071         rec = map->record;
4072         for (i = 0; i < rec->cnt; i++) {
4073                 struct btf_field *field = &rec->fields[i];
4074                 u32 p = field->offset;
4075
4076                 /* If any part of a field  can be touched by load/store, reject
4077                  * this program. To check that [x1, x2) overlaps with [y1, y2),
4078                  * it is sufficient to check x1 < y2 && y1 < x2.
4079                  */
4080                 if (reg->smin_value + off < p + btf_field_type_size(field->type) &&
4081                     p < reg->umax_value + off + size) {
4082                         switch (field->type) {
4083                         case BPF_KPTR_UNREF:
4084                         case BPF_KPTR_REF:
4085                                 if (src != ACCESS_DIRECT) {
4086                                         verbose(env, "kptr cannot be accessed indirectly by helper\n");
4087                                         return -EACCES;
4088                                 }
4089                                 if (!tnum_is_const(reg->var_off)) {
4090                                         verbose(env, "kptr access cannot have variable offset\n");
4091                                         return -EACCES;
4092                                 }
4093                                 if (p != off + reg->var_off.value) {
4094                                         verbose(env, "kptr access misaligned expected=%u off=%llu\n",
4095                                                 p, off + reg->var_off.value);
4096                                         return -EACCES;
4097                                 }
4098                                 if (size != bpf_size_to_bytes(BPF_DW)) {
4099                                         verbose(env, "kptr access size must be BPF_DW\n");
4100                                         return -EACCES;
4101                                 }
4102                                 break;
4103                         default:
4104                                 verbose(env, "%s cannot be accessed directly by load/store\n",
4105                                         btf_field_type_name(field->type));
4106                                 return -EACCES;
4107                         }
4108                 }
4109         }
4110         return 0;
4111 }
4112
4113 #define MAX_PACKET_OFF 0xffff
4114
4115 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
4116                                        const struct bpf_call_arg_meta *meta,
4117                                        enum bpf_access_type t)
4118 {
4119         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
4120
4121         switch (prog_type) {
4122         /* Program types only with direct read access go here! */
4123         case BPF_PROG_TYPE_LWT_IN:
4124         case BPF_PROG_TYPE_LWT_OUT:
4125         case BPF_PROG_TYPE_LWT_SEG6LOCAL:
4126         case BPF_PROG_TYPE_SK_REUSEPORT:
4127         case BPF_PROG_TYPE_FLOW_DISSECTOR:
4128         case BPF_PROG_TYPE_CGROUP_SKB:
4129                 if (t == BPF_WRITE)
4130                         return false;
4131                 fallthrough;
4132
4133         /* Program types with direct read + write access go here! */
4134         case BPF_PROG_TYPE_SCHED_CLS:
4135         case BPF_PROG_TYPE_SCHED_ACT:
4136         case BPF_PROG_TYPE_XDP:
4137         case BPF_PROG_TYPE_LWT_XMIT:
4138         case BPF_PROG_TYPE_SK_SKB:
4139         case BPF_PROG_TYPE_SK_MSG:
4140                 if (meta)
4141                         return meta->pkt_access;
4142
4143                 env->seen_direct_write = true;
4144                 return true;
4145
4146         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4147                 if (t == BPF_WRITE)
4148                         env->seen_direct_write = true;
4149
4150                 return true;
4151
4152         default:
4153                 return false;
4154         }
4155 }
4156
4157 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
4158                                int size, bool zero_size_allowed)
4159 {
4160         struct bpf_reg_state *regs = cur_regs(env);
4161         struct bpf_reg_state *reg = &regs[regno];
4162         int err;
4163
4164         /* We may have added a variable offset to the packet pointer; but any
4165          * reg->range we have comes after that.  We are only checking the fixed
4166          * offset.
4167          */
4168
4169         /* We don't allow negative numbers, because we aren't tracking enough
4170          * detail to prove they're safe.
4171          */
4172         if (reg->smin_value < 0) {
4173                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4174                         regno);
4175                 return -EACCES;
4176         }
4177
4178         err = reg->range < 0 ? -EINVAL :
4179               __check_mem_access(env, regno, off, size, reg->range,
4180                                  zero_size_allowed);
4181         if (err) {
4182                 verbose(env, "R%d offset is outside of the packet\n", regno);
4183                 return err;
4184         }
4185
4186         /* __check_mem_access has made sure "off + size - 1" is within u16.
4187          * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
4188          * otherwise find_good_pkt_pointers would have refused to set range info
4189          * that __check_mem_access would have rejected this pkt access.
4190          * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
4191          */
4192         env->prog->aux->max_pkt_offset =
4193                 max_t(u32, env->prog->aux->max_pkt_offset,
4194                       off + reg->umax_value + size - 1);
4195
4196         return err;
4197 }
4198
4199 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
4200 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
4201                             enum bpf_access_type t, enum bpf_reg_type *reg_type,
4202                             struct btf **btf, u32 *btf_id)
4203 {
4204         struct bpf_insn_access_aux info = {
4205                 .reg_type = *reg_type,
4206                 .log = &env->log,
4207         };
4208
4209         if (env->ops->is_valid_access &&
4210             env->ops->is_valid_access(off, size, t, env->prog, &info)) {
4211                 /* A non zero info.ctx_field_size indicates that this field is a
4212                  * candidate for later verifier transformation to load the whole
4213                  * field and then apply a mask when accessed with a narrower
4214                  * access than actual ctx access size. A zero info.ctx_field_size
4215                  * will only allow for whole field access and rejects any other
4216                  * type of narrower access.
4217                  */
4218                 *reg_type = info.reg_type;
4219
4220                 if (base_type(*reg_type) == PTR_TO_BTF_ID) {
4221                         *btf = info.btf;
4222                         *btf_id = info.btf_id;
4223                 } else {
4224                         env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
4225                 }
4226                 /* remember the offset of last byte accessed in ctx */
4227                 if (env->prog->aux->max_ctx_offset < off + size)
4228                         env->prog->aux->max_ctx_offset = off + size;
4229                 return 0;
4230         }
4231
4232         verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
4233         return -EACCES;
4234 }
4235
4236 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
4237                                   int size)
4238 {
4239         if (size < 0 || off < 0 ||
4240             (u64)off + size > sizeof(struct bpf_flow_keys)) {
4241                 verbose(env, "invalid access to flow keys off=%d size=%d\n",
4242                         off, size);
4243                 return -EACCES;
4244         }
4245         return 0;
4246 }
4247
4248 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
4249                              u32 regno, int off, int size,
4250                              enum bpf_access_type t)
4251 {
4252         struct bpf_reg_state *regs = cur_regs(env);
4253         struct bpf_reg_state *reg = &regs[regno];
4254         struct bpf_insn_access_aux info = {};
4255         bool valid;
4256
4257         if (reg->smin_value < 0) {
4258                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4259                         regno);
4260                 return -EACCES;
4261         }
4262
4263         switch (reg->type) {
4264         case PTR_TO_SOCK_COMMON:
4265                 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
4266                 break;
4267         case PTR_TO_SOCKET:
4268                 valid = bpf_sock_is_valid_access(off, size, t, &info);
4269                 break;
4270         case PTR_TO_TCP_SOCK:
4271                 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
4272                 break;
4273         case PTR_TO_XDP_SOCK:
4274                 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
4275                 break;
4276         default:
4277                 valid = false;
4278         }
4279
4280
4281         if (valid) {
4282                 env->insn_aux_data[insn_idx].ctx_field_size =
4283                         info.ctx_field_size;
4284                 return 0;
4285         }
4286
4287         verbose(env, "R%d invalid %s access off=%d size=%d\n",
4288                 regno, reg_type_str(env, reg->type), off, size);
4289
4290         return -EACCES;
4291 }
4292
4293 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
4294 {
4295         return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4296 }
4297
4298 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
4299 {
4300         const struct bpf_reg_state *reg = reg_state(env, regno);
4301
4302         return reg->type == PTR_TO_CTX;
4303 }
4304
4305 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
4306 {
4307         const struct bpf_reg_state *reg = reg_state(env, regno);
4308
4309         return type_is_sk_pointer(reg->type);
4310 }
4311
4312 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
4313 {
4314         const struct bpf_reg_state *reg = reg_state(env, regno);
4315
4316         return type_is_pkt_pointer(reg->type);
4317 }
4318
4319 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
4320 {
4321         const struct bpf_reg_state *reg = reg_state(env, regno);
4322
4323         /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
4324         return reg->type == PTR_TO_FLOW_KEYS;
4325 }
4326
4327 static bool is_trusted_reg(const struct bpf_reg_state *reg)
4328 {
4329         /* A referenced register is always trusted. */
4330         if (reg->ref_obj_id)
4331                 return true;
4332
4333         /* If a register is not referenced, it is trusted if it has the
4334          * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
4335          * other type modifiers may be safe, but we elect to take an opt-in
4336          * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
4337          * not.
4338          *
4339          * Eventually, we should make PTR_TRUSTED the single source of truth
4340          * for whether a register is trusted.
4341          */
4342         return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
4343                !bpf_type_has_unsafe_modifiers(reg->type);
4344 }
4345
4346 static bool is_rcu_reg(const struct bpf_reg_state *reg)
4347 {
4348         return reg->type & MEM_RCU;
4349 }
4350
4351 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
4352                                    const struct bpf_reg_state *reg,
4353                                    int off, int size, bool strict)
4354 {
4355         struct tnum reg_off;
4356         int ip_align;
4357
4358         /* Byte size accesses are always allowed. */
4359         if (!strict || size == 1)
4360                 return 0;
4361
4362         /* For platforms that do not have a Kconfig enabling
4363          * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
4364          * NET_IP_ALIGN is universally set to '2'.  And on platforms
4365          * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
4366          * to this code only in strict mode where we want to emulate
4367          * the NET_IP_ALIGN==2 checking.  Therefore use an
4368          * unconditional IP align value of '2'.
4369          */
4370         ip_align = 2;
4371
4372         reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
4373         if (!tnum_is_aligned(reg_off, size)) {
4374                 char tn_buf[48];
4375
4376                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4377                 verbose(env,
4378                         "misaligned packet access off %d+%s+%d+%d size %d\n",
4379                         ip_align, tn_buf, reg->off, off, size);
4380                 return -EACCES;
4381         }
4382
4383         return 0;
4384 }
4385
4386 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
4387                                        const struct bpf_reg_state *reg,
4388                                        const char *pointer_desc,
4389                                        int off, int size, bool strict)
4390 {
4391         struct tnum reg_off;
4392
4393         /* Byte size accesses are always allowed. */
4394         if (!strict || size == 1)
4395                 return 0;
4396
4397         reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
4398         if (!tnum_is_aligned(reg_off, size)) {
4399                 char tn_buf[48];
4400
4401                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4402                 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
4403                         pointer_desc, tn_buf, reg->off, off, size);
4404                 return -EACCES;
4405         }
4406
4407         return 0;
4408 }
4409
4410 static int check_ptr_alignment(struct bpf_verifier_env *env,
4411                                const struct bpf_reg_state *reg, int off,
4412                                int size, bool strict_alignment_once)
4413 {
4414         bool strict = env->strict_alignment || strict_alignment_once;
4415         const char *pointer_desc = "";
4416
4417         switch (reg->type) {
4418         case PTR_TO_PACKET:
4419         case PTR_TO_PACKET_META:
4420                 /* Special case, because of NET_IP_ALIGN. Given metadata sits
4421                  * right in front, treat it the very same way.
4422                  */
4423                 return check_pkt_ptr_alignment(env, reg, off, size, strict);
4424         case PTR_TO_FLOW_KEYS:
4425                 pointer_desc = "flow keys ";
4426                 break;
4427         case PTR_TO_MAP_KEY:
4428                 pointer_desc = "key ";
4429                 break;
4430         case PTR_TO_MAP_VALUE:
4431                 pointer_desc = "value ";
4432                 break;
4433         case PTR_TO_CTX:
4434                 pointer_desc = "context ";
4435                 break;
4436         case PTR_TO_STACK:
4437                 pointer_desc = "stack ";
4438                 /* The stack spill tracking logic in check_stack_write_fixed_off()
4439                  * and check_stack_read_fixed_off() relies on stack accesses being
4440                  * aligned.
4441                  */
4442                 strict = true;
4443                 break;
4444         case PTR_TO_SOCKET:
4445                 pointer_desc = "sock ";
4446                 break;
4447         case PTR_TO_SOCK_COMMON:
4448                 pointer_desc = "sock_common ";
4449                 break;
4450         case PTR_TO_TCP_SOCK:
4451                 pointer_desc = "tcp_sock ";
4452                 break;
4453         case PTR_TO_XDP_SOCK:
4454                 pointer_desc = "xdp_sock ";
4455                 break;
4456         default:
4457                 break;
4458         }
4459         return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
4460                                            strict);
4461 }
4462
4463 static int update_stack_depth(struct bpf_verifier_env *env,
4464                               const struct bpf_func_state *func,
4465                               int off)
4466 {
4467         u16 stack = env->subprog_info[func->subprogno].stack_depth;
4468
4469         if (stack >= -off)
4470                 return 0;
4471
4472         /* update known max for given subprogram */
4473         env->subprog_info[func->subprogno].stack_depth = -off;
4474         return 0;
4475 }
4476
4477 /* starting from main bpf function walk all instructions of the function
4478  * and recursively walk all callees that given function can call.
4479  * Ignore jump and exit insns.
4480  * Since recursion is prevented by check_cfg() this algorithm
4481  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
4482  */
4483 static int check_max_stack_depth(struct bpf_verifier_env *env)
4484 {
4485         int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
4486         struct bpf_subprog_info *subprog = env->subprog_info;
4487         struct bpf_insn *insn = env->prog->insnsi;
4488         bool tail_call_reachable = false;
4489         int ret_insn[MAX_CALL_FRAMES];
4490         int ret_prog[MAX_CALL_FRAMES];
4491         int j;
4492
4493 process_func:
4494         /* protect against potential stack overflow that might happen when
4495          * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
4496          * depth for such case down to 256 so that the worst case scenario
4497          * would result in 8k stack size (32 which is tailcall limit * 256 =
4498          * 8k).
4499          *
4500          * To get the idea what might happen, see an example:
4501          * func1 -> sub rsp, 128
4502          *  subfunc1 -> sub rsp, 256
4503          *  tailcall1 -> add rsp, 256
4504          *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
4505          *   subfunc2 -> sub rsp, 64
4506          *   subfunc22 -> sub rsp, 128
4507          *   tailcall2 -> add rsp, 128
4508          *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
4509          *
4510          * tailcall will unwind the current stack frame but it will not get rid
4511          * of caller's stack as shown on the example above.
4512          */
4513         if (idx && subprog[idx].has_tail_call && depth >= 256) {
4514                 verbose(env,
4515                         "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
4516                         depth);
4517                 return -EACCES;
4518         }
4519         /* round up to 32-bytes, since this is granularity
4520          * of interpreter stack size
4521          */
4522         depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
4523         if (depth > MAX_BPF_STACK) {
4524                 verbose(env, "combined stack size of %d calls is %d. Too large\n",
4525                         frame + 1, depth);
4526                 return -EACCES;
4527         }
4528 continue_func:
4529         subprog_end = subprog[idx + 1].start;
4530         for (; i < subprog_end; i++) {
4531                 int next_insn;
4532
4533                 if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
4534                         continue;
4535                 /* remember insn and function to return to */
4536                 ret_insn[frame] = i + 1;
4537                 ret_prog[frame] = idx;
4538
4539                 /* find the callee */
4540                 next_insn = i + insn[i].imm + 1;
4541                 idx = find_subprog(env, next_insn);
4542                 if (idx < 0) {
4543                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
4544                                   next_insn);
4545                         return -EFAULT;
4546                 }
4547                 if (subprog[idx].is_async_cb) {
4548                         if (subprog[idx].has_tail_call) {
4549                                 verbose(env, "verifier bug. subprog has tail_call and async cb\n");
4550                                 return -EFAULT;
4551                         }
4552                          /* async callbacks don't increase bpf prog stack size */
4553                         continue;
4554                 }
4555                 i = next_insn;
4556
4557                 if (subprog[idx].has_tail_call)
4558                         tail_call_reachable = true;
4559
4560                 frame++;
4561                 if (frame >= MAX_CALL_FRAMES) {
4562                         verbose(env, "the call stack of %d frames is too deep !\n",
4563                                 frame);
4564                         return -E2BIG;
4565                 }
4566                 goto process_func;
4567         }
4568         /* if tail call got detected across bpf2bpf calls then mark each of the
4569          * currently present subprog frames as tail call reachable subprogs;
4570          * this info will be utilized by JIT so that we will be preserving the
4571          * tail call counter throughout bpf2bpf calls combined with tailcalls
4572          */
4573         if (tail_call_reachable)
4574                 for (j = 0; j < frame; j++)
4575                         subprog[ret_prog[j]].tail_call_reachable = true;
4576         if (subprog[0].tail_call_reachable)
4577                 env->prog->aux->tail_call_reachable = true;
4578
4579         /* end of for() loop means the last insn of the 'subprog'
4580          * was reached. Doesn't matter whether it was JA or EXIT
4581          */
4582         if (frame == 0)
4583                 return 0;
4584         depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
4585         frame--;
4586         i = ret_insn[frame];
4587         idx = ret_prog[frame];
4588         goto continue_func;
4589 }
4590
4591 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
4592 static int get_callee_stack_depth(struct bpf_verifier_env *env,
4593                                   const struct bpf_insn *insn, int idx)
4594 {
4595         int start = idx + insn->imm + 1, subprog;
4596
4597         subprog = find_subprog(env, start);
4598         if (subprog < 0) {
4599                 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
4600                           start);
4601                 return -EFAULT;
4602         }
4603         return env->subprog_info[subprog].stack_depth;
4604 }
4605 #endif
4606
4607 static int __check_buffer_access(struct bpf_verifier_env *env,
4608                                  const char *buf_info,
4609                                  const struct bpf_reg_state *reg,
4610                                  int regno, int off, int size)
4611 {
4612         if (off < 0) {
4613                 verbose(env,
4614                         "R%d invalid %s buffer access: off=%d, size=%d\n",
4615                         regno, buf_info, off, size);
4616                 return -EACCES;
4617         }
4618         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4619                 char tn_buf[48];
4620
4621                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4622                 verbose(env,
4623                         "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
4624                         regno, off, tn_buf);
4625                 return -EACCES;
4626         }
4627
4628         return 0;
4629 }
4630
4631 static int check_tp_buffer_access(struct bpf_verifier_env *env,
4632                                   const struct bpf_reg_state *reg,
4633                                   int regno, int off, int size)
4634 {
4635         int err;
4636
4637         err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
4638         if (err)
4639                 return err;
4640
4641         if (off + size > env->prog->aux->max_tp_access)
4642                 env->prog->aux->max_tp_access = off + size;
4643
4644         return 0;
4645 }
4646
4647 static int check_buffer_access(struct bpf_verifier_env *env,
4648                                const struct bpf_reg_state *reg,
4649                                int regno, int off, int size,
4650                                bool zero_size_allowed,
4651                                u32 *max_access)
4652 {
4653         const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
4654         int err;
4655
4656         err = __check_buffer_access(env, buf_info, reg, regno, off, size);
4657         if (err)
4658                 return err;
4659
4660         if (off + size > *max_access)
4661                 *max_access = off + size;
4662
4663         return 0;
4664 }
4665
4666 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
4667 static void zext_32_to_64(struct bpf_reg_state *reg)
4668 {
4669         reg->var_off = tnum_subreg(reg->var_off);
4670         __reg_assign_32_into_64(reg);
4671 }
4672
4673 /* truncate register to smaller size (in bytes)
4674  * must be called with size < BPF_REG_SIZE
4675  */
4676 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
4677 {
4678         u64 mask;
4679
4680         /* clear high bits in bit representation */
4681         reg->var_off = tnum_cast(reg->var_off, size);
4682
4683         /* fix arithmetic bounds */
4684         mask = ((u64)1 << (size * 8)) - 1;
4685         if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
4686                 reg->umin_value &= mask;
4687                 reg->umax_value &= mask;
4688         } else {
4689                 reg->umin_value = 0;
4690                 reg->umax_value = mask;
4691         }
4692         reg->smin_value = reg->umin_value;
4693         reg->smax_value = reg->umax_value;
4694
4695         /* If size is smaller than 32bit register the 32bit register
4696          * values are also truncated so we push 64-bit bounds into
4697          * 32-bit bounds. Above were truncated < 32-bits already.
4698          */
4699         if (size >= 4)
4700                 return;
4701         __reg_combine_64_into_32(reg);
4702 }
4703
4704 static bool bpf_map_is_rdonly(const struct bpf_map *map)
4705 {
4706         /* A map is considered read-only if the following condition are true:
4707          *
4708          * 1) BPF program side cannot change any of the map content. The
4709          *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
4710          *    and was set at map creation time.
4711          * 2) The map value(s) have been initialized from user space by a
4712          *    loader and then "frozen", such that no new map update/delete
4713          *    operations from syscall side are possible for the rest of
4714          *    the map's lifetime from that point onwards.
4715          * 3) Any parallel/pending map update/delete operations from syscall
4716          *    side have been completed. Only after that point, it's safe to
4717          *    assume that map value(s) are immutable.
4718          */
4719         return (map->map_flags & BPF_F_RDONLY_PROG) &&
4720                READ_ONCE(map->frozen) &&
4721                !bpf_map_write_active(map);
4722 }
4723
4724 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
4725 {
4726         void *ptr;
4727         u64 addr;
4728         int err;
4729
4730         err = map->ops->map_direct_value_addr(map, &addr, off);
4731         if (err)
4732                 return err;
4733         ptr = (void *)(long)addr + off;
4734
4735         switch (size) {
4736         case sizeof(u8):
4737                 *val = (u64)*(u8 *)ptr;
4738                 break;
4739         case sizeof(u16):
4740                 *val = (u64)*(u16 *)ptr;
4741                 break;
4742         case sizeof(u32):
4743                 *val = (u64)*(u32 *)ptr;
4744                 break;
4745         case sizeof(u64):
4746                 *val = *(u64 *)ptr;
4747                 break;
4748         default:
4749                 return -EINVAL;
4750         }
4751         return 0;
4752 }
4753
4754 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
4755                                    struct bpf_reg_state *regs,
4756                                    int regno, int off, int size,
4757                                    enum bpf_access_type atype,
4758                                    int value_regno)
4759 {
4760         struct bpf_reg_state *reg = regs + regno;
4761         const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
4762         const char *tname = btf_name_by_offset(reg->btf, t->name_off);
4763         enum bpf_type_flag flag = 0;
4764         u32 btf_id;
4765         int ret;
4766
4767         if (!env->allow_ptr_leaks) {
4768                 verbose(env,
4769                         "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
4770                         tname);
4771                 return -EPERM;
4772         }
4773         if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
4774                 verbose(env,
4775                         "Cannot access kernel 'struct %s' from non-GPL compatible program\n",
4776                         tname);
4777                 return -EINVAL;
4778         }
4779         if (off < 0) {
4780                 verbose(env,
4781                         "R%d is ptr_%s invalid negative access: off=%d\n",
4782                         regno, tname, off);
4783                 return -EACCES;
4784         }
4785         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4786                 char tn_buf[48];
4787
4788                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4789                 verbose(env,
4790                         "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
4791                         regno, tname, off, tn_buf);
4792                 return -EACCES;
4793         }
4794
4795         if (reg->type & MEM_USER) {
4796                 verbose(env,
4797                         "R%d is ptr_%s access user memory: off=%d\n",
4798                         regno, tname, off);
4799                 return -EACCES;
4800         }
4801
4802         if (reg->type & MEM_PERCPU) {
4803                 verbose(env,
4804                         "R%d is ptr_%s access percpu memory: off=%d\n",
4805                         regno, tname, off);
4806                 return -EACCES;
4807         }
4808
4809         if (env->ops->btf_struct_access && !type_is_alloc(reg->type)) {
4810                 if (!btf_is_kernel(reg->btf)) {
4811                         verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
4812                         return -EFAULT;
4813                 }
4814                 ret = env->ops->btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag);
4815         } else {
4816                 /* Writes are permitted with default btf_struct_access for
4817                  * program allocated objects (which always have ref_obj_id > 0),
4818                  * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
4819                  */
4820                 if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
4821                         verbose(env, "only read is supported\n");
4822                         return -EACCES;
4823                 }
4824
4825                 if (type_is_alloc(reg->type) && !reg->ref_obj_id) {
4826                         verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
4827                         return -EFAULT;
4828                 }
4829
4830                 ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag);
4831         }
4832
4833         if (ret < 0)
4834                 return ret;
4835
4836         /* If this is an untrusted pointer, all pointers formed by walking it
4837          * also inherit the untrusted flag.
4838          */
4839         if (type_flag(reg->type) & PTR_UNTRUSTED)
4840                 flag |= PTR_UNTRUSTED;
4841
4842         /* By default any pointer obtained from walking a trusted pointer is
4843          * no longer trusted except the rcu case below.
4844          */
4845         flag &= ~PTR_TRUSTED;
4846
4847         if (flag & MEM_RCU) {
4848                 /* Mark value register as MEM_RCU only if it is protected by
4849                  * bpf_rcu_read_lock() and the ptr reg is rcu or trusted. MEM_RCU
4850                  * itself can already indicate trustedness inside the rcu
4851                  * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since
4852                  * it could be null in some cases.
4853                  */
4854                 if (!env->cur_state->active_rcu_lock ||
4855                     !(is_trusted_reg(reg) || is_rcu_reg(reg)))
4856                         flag &= ~MEM_RCU;
4857                 else
4858                         flag |= PTR_MAYBE_NULL;
4859         } else if (reg->type & MEM_RCU) {
4860                 /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
4861                  * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
4862                  */
4863                 flag |= PTR_UNTRUSTED;
4864         }
4865
4866         if (atype == BPF_READ && value_regno >= 0)
4867                 mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
4868
4869         return 0;
4870 }
4871
4872 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
4873                                    struct bpf_reg_state *regs,
4874                                    int regno, int off, int size,
4875                                    enum bpf_access_type atype,
4876                                    int value_regno)
4877 {
4878         struct bpf_reg_state *reg = regs + regno;
4879         struct bpf_map *map = reg->map_ptr;
4880         struct bpf_reg_state map_reg;
4881         enum bpf_type_flag flag = 0;
4882         const struct btf_type *t;
4883         const char *tname;
4884         u32 btf_id;
4885         int ret;
4886
4887         if (!btf_vmlinux) {
4888                 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
4889                 return -ENOTSUPP;
4890         }
4891
4892         if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
4893                 verbose(env, "map_ptr access not supported for map type %d\n",
4894                         map->map_type);
4895                 return -ENOTSUPP;
4896         }
4897
4898         t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
4899         tname = btf_name_by_offset(btf_vmlinux, t->name_off);
4900
4901         if (!env->allow_ptr_leaks) {
4902                 verbose(env,
4903                         "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
4904                         tname);
4905                 return -EPERM;
4906         }
4907
4908         if (off < 0) {
4909                 verbose(env, "R%d is %s invalid negative access: off=%d\n",
4910                         regno, tname, off);
4911                 return -EACCES;
4912         }
4913
4914         if (atype != BPF_READ) {
4915                 verbose(env, "only read from %s is supported\n", tname);
4916                 return -EACCES;
4917         }
4918
4919         /* Simulate access to a PTR_TO_BTF_ID */
4920         memset(&map_reg, 0, sizeof(map_reg));
4921         mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
4922         ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag);
4923         if (ret < 0)
4924                 return ret;
4925
4926         if (value_regno >= 0)
4927                 mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
4928
4929         return 0;
4930 }
4931
4932 /* Check that the stack access at the given offset is within bounds. The
4933  * maximum valid offset is -1.
4934  *
4935  * The minimum valid offset is -MAX_BPF_STACK for writes, and
4936  * -state->allocated_stack for reads.
4937  */
4938 static int check_stack_slot_within_bounds(int off,
4939                                           struct bpf_func_state *state,
4940                                           enum bpf_access_type t)
4941 {
4942         int min_valid_off;
4943
4944         if (t == BPF_WRITE)
4945                 min_valid_off = -MAX_BPF_STACK;
4946         else
4947                 min_valid_off = -state->allocated_stack;
4948
4949         if (off < min_valid_off || off > -1)
4950                 return -EACCES;
4951         return 0;
4952 }
4953
4954 /* Check that the stack access at 'regno + off' falls within the maximum stack
4955  * bounds.
4956  *
4957  * 'off' includes `regno->offset`, but not its dynamic part (if any).
4958  */
4959 static int check_stack_access_within_bounds(
4960                 struct bpf_verifier_env *env,
4961                 int regno, int off, int access_size,
4962                 enum bpf_access_src src, enum bpf_access_type type)
4963 {
4964         struct bpf_reg_state *regs = cur_regs(env);
4965         struct bpf_reg_state *reg = regs + regno;
4966         struct bpf_func_state *state = func(env, reg);
4967         int min_off, max_off;
4968         int err;
4969         char *err_extra;
4970
4971         if (src == ACCESS_HELPER)
4972                 /* We don't know if helpers are reading or writing (or both). */
4973                 err_extra = " indirect access to";
4974         else if (type == BPF_READ)
4975                 err_extra = " read from";
4976         else
4977                 err_extra = " write to";
4978
4979         if (tnum_is_const(reg->var_off)) {
4980                 min_off = reg->var_off.value + off;
4981                 if (access_size > 0)
4982                         max_off = min_off + access_size - 1;
4983                 else
4984                         max_off = min_off;
4985         } else {
4986                 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
4987                     reg->smin_value <= -BPF_MAX_VAR_OFF) {
4988                         verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
4989                                 err_extra, regno);
4990                         return -EACCES;
4991                 }
4992                 min_off = reg->smin_value + off;
4993                 if (access_size > 0)
4994                         max_off = reg->smax_value + off + access_size - 1;
4995                 else
4996                         max_off = min_off;
4997         }
4998
4999         err = check_stack_slot_within_bounds(min_off, state, type);
5000         if (!err)
5001                 err = check_stack_slot_within_bounds(max_off, state, type);
5002
5003         if (err) {
5004                 if (tnum_is_const(reg->var_off)) {
5005                         verbose(env, "invalid%s stack R%d off=%d size=%d\n",
5006                                 err_extra, regno, off, access_size);
5007                 } else {
5008                         char tn_buf[48];
5009
5010                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5011                         verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
5012                                 err_extra, regno, tn_buf, access_size);
5013                 }
5014         }
5015         return err;
5016 }
5017
5018 /* check whether memory at (regno + off) is accessible for t = (read | write)
5019  * if t==write, value_regno is a register which value is stored into memory
5020  * if t==read, value_regno is a register which will receive the value from memory
5021  * if t==write && value_regno==-1, some unknown value is stored into memory
5022  * if t==read && value_regno==-1, don't care what we read from memory
5023  */
5024 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
5025                             int off, int bpf_size, enum bpf_access_type t,
5026                             int value_regno, bool strict_alignment_once)
5027 {
5028         struct bpf_reg_state *regs = cur_regs(env);
5029         struct bpf_reg_state *reg = regs + regno;
5030         struct bpf_func_state *state;
5031         int size, err = 0;
5032
5033         size = bpf_size_to_bytes(bpf_size);
5034         if (size < 0)
5035                 return size;
5036
5037         /* alignment checks will add in reg->off themselves */
5038         err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
5039         if (err)
5040                 return err;
5041
5042         /* for access checks, reg->off is just part of off */
5043         off += reg->off;
5044
5045         if (reg->type == PTR_TO_MAP_KEY) {
5046                 if (t == BPF_WRITE) {
5047                         verbose(env, "write to change key R%d not allowed\n", regno);
5048                         return -EACCES;
5049                 }
5050
5051                 err = check_mem_region_access(env, regno, off, size,
5052                                               reg->map_ptr->key_size, false);
5053                 if (err)
5054                         return err;
5055                 if (value_regno >= 0)
5056                         mark_reg_unknown(env, regs, value_regno);
5057         } else if (reg->type == PTR_TO_MAP_VALUE) {
5058                 struct btf_field *kptr_field = NULL;
5059
5060                 if (t == BPF_WRITE && value_regno >= 0 &&
5061                     is_pointer_value(env, value_regno)) {
5062                         verbose(env, "R%d leaks addr into map\n", value_regno);
5063                         return -EACCES;
5064                 }
5065                 err = check_map_access_type(env, regno, off, size, t);
5066                 if (err)
5067                         return err;
5068                 err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
5069                 if (err)
5070                         return err;
5071                 if (tnum_is_const(reg->var_off))
5072                         kptr_field = btf_record_find(reg->map_ptr->record,
5073                                                      off + reg->var_off.value, BPF_KPTR);
5074                 if (kptr_field) {
5075                         err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
5076                 } else if (t == BPF_READ && value_regno >= 0) {
5077                         struct bpf_map *map = reg->map_ptr;
5078
5079                         /* if map is read-only, track its contents as scalars */
5080                         if (tnum_is_const(reg->var_off) &&
5081                             bpf_map_is_rdonly(map) &&
5082                             map->ops->map_direct_value_addr) {
5083                                 int map_off = off + reg->var_off.value;
5084                                 u64 val = 0;
5085
5086                                 err = bpf_map_direct_read(map, map_off, size,
5087                                                           &val);
5088                                 if (err)
5089                                         return err;
5090
5091                                 regs[value_regno].type = SCALAR_VALUE;
5092                                 __mark_reg_known(&regs[value_regno], val);
5093                         } else {
5094                                 mark_reg_unknown(env, regs, value_regno);
5095                         }
5096                 }
5097         } else if (base_type(reg->type) == PTR_TO_MEM) {
5098                 bool rdonly_mem = type_is_rdonly_mem(reg->type);
5099
5100                 if (type_may_be_null(reg->type)) {
5101                         verbose(env, "R%d invalid mem access '%s'\n", regno,
5102                                 reg_type_str(env, reg->type));
5103                         return -EACCES;
5104                 }
5105
5106                 if (t == BPF_WRITE && rdonly_mem) {
5107                         verbose(env, "R%d cannot write into %s\n",
5108                                 regno, reg_type_str(env, reg->type));
5109                         return -EACCES;
5110                 }
5111
5112                 if (t == BPF_WRITE && value_regno >= 0 &&
5113                     is_pointer_value(env, value_regno)) {
5114                         verbose(env, "R%d leaks addr into mem\n", value_regno);
5115                         return -EACCES;
5116                 }
5117
5118                 err = check_mem_region_access(env, regno, off, size,
5119                                               reg->mem_size, false);
5120                 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
5121                         mark_reg_unknown(env, regs, value_regno);
5122         } else if (reg->type == PTR_TO_CTX) {
5123                 enum bpf_reg_type reg_type = SCALAR_VALUE;
5124                 struct btf *btf = NULL;
5125                 u32 btf_id = 0;
5126
5127                 if (t == BPF_WRITE && value_regno >= 0 &&
5128                     is_pointer_value(env, value_regno)) {
5129                         verbose(env, "R%d leaks addr into ctx\n", value_regno);
5130                         return -EACCES;
5131                 }
5132
5133                 err = check_ptr_off_reg(env, reg, regno);
5134                 if (err < 0)
5135                         return err;
5136
5137                 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
5138                                        &btf_id);
5139                 if (err)
5140                         verbose_linfo(env, insn_idx, "; ");
5141                 if (!err && t == BPF_READ && value_regno >= 0) {
5142                         /* ctx access returns either a scalar, or a
5143                          * PTR_TO_PACKET[_META,_END]. In the latter
5144                          * case, we know the offset is zero.
5145                          */
5146                         if (reg_type == SCALAR_VALUE) {
5147                                 mark_reg_unknown(env, regs, value_regno);
5148                         } else {
5149                                 mark_reg_known_zero(env, regs,
5150                                                     value_regno);
5151                                 if (type_may_be_null(reg_type))
5152                                         regs[value_regno].id = ++env->id_gen;
5153                                 /* A load of ctx field could have different
5154                                  * actual load size with the one encoded in the
5155                                  * insn. When the dst is PTR, it is for sure not
5156                                  * a sub-register.
5157                                  */
5158                                 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
5159                                 if (base_type(reg_type) == PTR_TO_BTF_ID) {
5160                                         regs[value_regno].btf = btf;
5161                                         regs[value_regno].btf_id = btf_id;
5162                                 }
5163                         }
5164                         regs[value_regno].type = reg_type;
5165                 }
5166
5167         } else if (reg->type == PTR_TO_STACK) {
5168                 /* Basic bounds checks. */
5169                 err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
5170                 if (err)
5171                         return err;
5172
5173                 state = func(env, reg);
5174                 err = update_stack_depth(env, state, off);
5175                 if (err)
5176                         return err;
5177
5178                 if (t == BPF_READ)
5179                         err = check_stack_read(env, regno, off, size,
5180                                                value_regno);
5181                 else
5182                         err = check_stack_write(env, regno, off, size,
5183                                                 value_regno, insn_idx);
5184         } else if (reg_is_pkt_pointer(reg)) {
5185                 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
5186                         verbose(env, "cannot write into packet\n");
5187                         return -EACCES;
5188                 }
5189                 if (t == BPF_WRITE && value_regno >= 0 &&
5190                     is_pointer_value(env, value_regno)) {
5191                         verbose(env, "R%d leaks addr into packet\n",
5192                                 value_regno);
5193                         return -EACCES;
5194                 }
5195                 err = check_packet_access(env, regno, off, size, false);
5196                 if (!err && t == BPF_READ && value_regno >= 0)
5197                         mark_reg_unknown(env, regs, value_regno);
5198         } else if (reg->type == PTR_TO_FLOW_KEYS) {
5199                 if (t == BPF_WRITE && value_regno >= 0 &&
5200                     is_pointer_value(env, value_regno)) {
5201                         verbose(env, "R%d leaks addr into flow keys\n",
5202                                 value_regno);
5203                         return -EACCES;
5204                 }
5205
5206                 err = check_flow_keys_access(env, off, size);
5207                 if (!err && t == BPF_READ && value_regno >= 0)
5208                         mark_reg_unknown(env, regs, value_regno);
5209         } else if (type_is_sk_pointer(reg->type)) {
5210                 if (t == BPF_WRITE) {
5211                         verbose(env, "R%d cannot write into %s\n",
5212                                 regno, reg_type_str(env, reg->type));
5213                         return -EACCES;
5214                 }
5215                 err = check_sock_access(env, insn_idx, regno, off, size, t);
5216                 if (!err && value_regno >= 0)
5217                         mark_reg_unknown(env, regs, value_regno);
5218         } else if (reg->type == PTR_TO_TP_BUFFER) {
5219                 err = check_tp_buffer_access(env, reg, regno, off, size);
5220                 if (!err && t == BPF_READ && value_regno >= 0)
5221                         mark_reg_unknown(env, regs, value_regno);
5222         } else if (base_type(reg->type) == PTR_TO_BTF_ID &&
5223                    !type_may_be_null(reg->type)) {
5224                 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
5225                                               value_regno);
5226         } else if (reg->type == CONST_PTR_TO_MAP) {
5227                 err = check_ptr_to_map_access(env, regs, regno, off, size, t,
5228                                               value_regno);
5229         } else if (base_type(reg->type) == PTR_TO_BUF) {
5230                 bool rdonly_mem = type_is_rdonly_mem(reg->type);
5231                 u32 *max_access;
5232
5233                 if (rdonly_mem) {
5234                         if (t == BPF_WRITE) {
5235                                 verbose(env, "R%d cannot write into %s\n",
5236                                         regno, reg_type_str(env, reg->type));
5237                                 return -EACCES;
5238                         }
5239                         max_access = &env->prog->aux->max_rdonly_access;
5240                 } else {
5241                         max_access = &env->prog->aux->max_rdwr_access;
5242                 }
5243
5244                 err = check_buffer_access(env, reg, regno, off, size, false,
5245                                           max_access);
5246
5247                 if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
5248                         mark_reg_unknown(env, regs, value_regno);
5249         } else {
5250                 verbose(env, "R%d invalid mem access '%s'\n", regno,
5251                         reg_type_str(env, reg->type));
5252                 return -EACCES;
5253         }
5254
5255         if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
5256             regs[value_regno].type == SCALAR_VALUE) {
5257                 /* b/h/w load zero-extends, mark upper bits as known 0 */
5258                 coerce_reg_to_size(&regs[value_regno], size);
5259         }
5260         return err;
5261 }
5262
5263 static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
5264 {
5265         int load_reg;
5266         int err;
5267
5268         switch (insn->imm) {
5269         case BPF_ADD:
5270         case BPF_ADD | BPF_FETCH:
5271         case BPF_AND:
5272         case BPF_AND | BPF_FETCH:
5273         case BPF_OR:
5274         case BPF_OR | BPF_FETCH:
5275         case BPF_XOR:
5276         case BPF_XOR | BPF_FETCH:
5277         case BPF_XCHG:
5278         case BPF_CMPXCHG:
5279                 break;
5280         default:
5281                 verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
5282                 return -EINVAL;
5283         }
5284
5285         if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
5286                 verbose(env, "invalid atomic operand size\n");
5287                 return -EINVAL;
5288         }
5289
5290         /* check src1 operand */
5291         err = check_reg_arg(env, insn->src_reg, SRC_OP);
5292         if (err)
5293                 return err;
5294
5295         /* check src2 operand */
5296         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
5297         if (err)
5298                 return err;
5299
5300         if (insn->imm == BPF_CMPXCHG) {
5301                 /* Check comparison of R0 with memory location */
5302                 const u32 aux_reg = BPF_REG_0;
5303
5304                 err = check_reg_arg(env, aux_reg, SRC_OP);
5305                 if (err)
5306                         return err;
5307
5308                 if (is_pointer_value(env, aux_reg)) {
5309                         verbose(env, "R%d leaks addr into mem\n", aux_reg);
5310                         return -EACCES;
5311                 }
5312         }
5313
5314         if (is_pointer_value(env, insn->src_reg)) {
5315                 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
5316                 return -EACCES;
5317         }
5318
5319         if (is_ctx_reg(env, insn->dst_reg) ||
5320             is_pkt_reg(env, insn->dst_reg) ||
5321             is_flow_key_reg(env, insn->dst_reg) ||
5322             is_sk_reg(env, insn->dst_reg)) {
5323                 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
5324                         insn->dst_reg,
5325                         reg_type_str(env, reg_state(env, insn->dst_reg)->type));
5326                 return -EACCES;
5327         }
5328
5329         if (insn->imm & BPF_FETCH) {
5330                 if (insn->imm == BPF_CMPXCHG)
5331                         load_reg = BPF_REG_0;
5332                 else
5333                         load_reg = insn->src_reg;
5334
5335                 /* check and record load of old value */
5336                 err = check_reg_arg(env, load_reg, DST_OP);
5337                 if (err)
5338                         return err;
5339         } else {
5340                 /* This instruction accesses a memory location but doesn't
5341                  * actually load it into a register.
5342                  */
5343                 load_reg = -1;
5344         }
5345
5346         /* Check whether we can read the memory, with second call for fetch
5347          * case to simulate the register fill.
5348          */
5349         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5350                                BPF_SIZE(insn->code), BPF_READ, -1, true);
5351         if (!err && load_reg >= 0)
5352                 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5353                                        BPF_SIZE(insn->code), BPF_READ, load_reg,
5354                                        true);
5355         if (err)
5356                 return err;
5357
5358         /* Check whether we can write into the same memory. */
5359         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5360                                BPF_SIZE(insn->code), BPF_WRITE, -1, true);
5361         if (err)
5362                 return err;
5363
5364         return 0;
5365 }
5366
5367 /* When register 'regno' is used to read the stack (either directly or through
5368  * a helper function) make sure that it's within stack boundary and, depending
5369  * on the access type, that all elements of the stack are initialized.
5370  *
5371  * 'off' includes 'regno->off', but not its dynamic part (if any).
5372  *
5373  * All registers that have been spilled on the stack in the slots within the
5374  * read offsets are marked as read.
5375  */
5376 static int check_stack_range_initialized(
5377                 struct bpf_verifier_env *env, int regno, int off,
5378                 int access_size, bool zero_size_allowed,
5379                 enum bpf_access_src type, struct bpf_call_arg_meta *meta)
5380 {
5381         struct bpf_reg_state *reg = reg_state(env, regno);
5382         struct bpf_func_state *state = func(env, reg);
5383         int err, min_off, max_off, i, j, slot, spi;
5384         char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
5385         enum bpf_access_type bounds_check_type;
5386         /* Some accesses can write anything into the stack, others are
5387          * read-only.
5388          */
5389         bool clobber = false;
5390
5391         if (access_size == 0 && !zero_size_allowed) {
5392                 verbose(env, "invalid zero-sized read\n");
5393                 return -EACCES;
5394         }
5395
5396         if (type == ACCESS_HELPER) {
5397                 /* The bounds checks for writes are more permissive than for
5398                  * reads. However, if raw_mode is not set, we'll do extra
5399                  * checks below.
5400                  */
5401                 bounds_check_type = BPF_WRITE;
5402                 clobber = true;
5403         } else {
5404                 bounds_check_type = BPF_READ;
5405         }
5406         err = check_stack_access_within_bounds(env, regno, off, access_size,
5407                                                type, bounds_check_type);
5408         if (err)
5409                 return err;
5410
5411
5412         if (tnum_is_const(reg->var_off)) {
5413                 min_off = max_off = reg->var_off.value + off;
5414         } else {
5415                 /* Variable offset is prohibited for unprivileged mode for
5416                  * simplicity since it requires corresponding support in
5417                  * Spectre masking for stack ALU.
5418                  * See also retrieve_ptr_limit().
5419                  */
5420                 if (!env->bypass_spec_v1) {
5421                         char tn_buf[48];
5422
5423                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5424                         verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
5425                                 regno, err_extra, tn_buf);
5426                         return -EACCES;
5427                 }
5428                 /* Only initialized buffer on stack is allowed to be accessed
5429                  * with variable offset. With uninitialized buffer it's hard to
5430                  * guarantee that whole memory is marked as initialized on
5431                  * helper return since specific bounds are unknown what may
5432                  * cause uninitialized stack leaking.
5433                  */
5434                 if (meta && meta->raw_mode)
5435                         meta = NULL;
5436
5437                 min_off = reg->smin_value + off;
5438                 max_off = reg->smax_value + off;
5439         }
5440
5441         if (meta && meta->raw_mode) {
5442                 meta->access_size = access_size;
5443                 meta->regno = regno;
5444                 return 0;
5445         }
5446
5447         for (i = min_off; i < max_off + access_size; i++) {
5448                 u8 *stype;
5449
5450                 slot = -i - 1;
5451                 spi = slot / BPF_REG_SIZE;
5452                 if (state->allocated_stack <= slot)
5453                         goto err;
5454                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
5455                 if (*stype == STACK_MISC)
5456                         goto mark;
5457                 if (*stype == STACK_ZERO) {
5458                         if (clobber) {
5459                                 /* helper can write anything into the stack */
5460                                 *stype = STACK_MISC;
5461                         }
5462                         goto mark;
5463                 }
5464
5465                 if (is_spilled_reg(&state->stack[spi]) &&
5466                     (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
5467                      env->allow_ptr_leaks)) {
5468                         if (clobber) {
5469                                 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
5470                                 for (j = 0; j < BPF_REG_SIZE; j++)
5471                                         scrub_spilled_slot(&state->stack[spi].slot_type[j]);
5472                         }
5473                         goto mark;
5474                 }
5475
5476 err:
5477                 if (tnum_is_const(reg->var_off)) {
5478                         verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
5479                                 err_extra, regno, min_off, i - min_off, access_size);
5480                 } else {
5481                         char tn_buf[48];
5482
5483                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5484                         verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
5485                                 err_extra, regno, tn_buf, i - min_off, access_size);
5486                 }
5487                 return -EACCES;
5488 mark:
5489                 /* reading any byte out of 8-byte 'spill_slot' will cause
5490                  * the whole slot to be marked as 'read'
5491                  */
5492                 mark_reg_read(env, &state->stack[spi].spilled_ptr,
5493                               state->stack[spi].spilled_ptr.parent,
5494                               REG_LIVE_READ64);
5495                 /* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
5496                  * be sure that whether stack slot is written to or not. Hence,
5497                  * we must still conservatively propagate reads upwards even if
5498                  * helper may write to the entire memory range.
5499                  */
5500         }
5501         return update_stack_depth(env, state, min_off);
5502 }
5503
5504 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
5505                                    int access_size, bool zero_size_allowed,
5506                                    struct bpf_call_arg_meta *meta)
5507 {
5508         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5509         u32 *max_access;
5510
5511         switch (base_type(reg->type)) {
5512         case PTR_TO_PACKET:
5513         case PTR_TO_PACKET_META:
5514                 return check_packet_access(env, regno, reg->off, access_size,
5515                                            zero_size_allowed);
5516         case PTR_TO_MAP_KEY:
5517                 if (meta && meta->raw_mode) {
5518                         verbose(env, "R%d cannot write into %s\n", regno,
5519                                 reg_type_str(env, reg->type));
5520                         return -EACCES;
5521                 }
5522                 return check_mem_region_access(env, regno, reg->off, access_size,
5523                                                reg->map_ptr->key_size, false);
5524         case PTR_TO_MAP_VALUE:
5525                 if (check_map_access_type(env, regno, reg->off, access_size,
5526                                           meta && meta->raw_mode ? BPF_WRITE :
5527                                           BPF_READ))
5528                         return -EACCES;
5529                 return check_map_access(env, regno, reg->off, access_size,
5530                                         zero_size_allowed, ACCESS_HELPER);
5531         case PTR_TO_MEM:
5532                 if (type_is_rdonly_mem(reg->type)) {
5533                         if (meta && meta->raw_mode) {
5534                                 verbose(env, "R%d cannot write into %s\n", regno,
5535                                         reg_type_str(env, reg->type));
5536                                 return -EACCES;
5537                         }
5538                 }
5539                 return check_mem_region_access(env, regno, reg->off,
5540                                                access_size, reg->mem_size,
5541                                                zero_size_allowed);
5542         case PTR_TO_BUF:
5543                 if (type_is_rdonly_mem(reg->type)) {
5544                         if (meta && meta->raw_mode) {
5545                                 verbose(env, "R%d cannot write into %s\n", regno,
5546                                         reg_type_str(env, reg->type));
5547                                 return -EACCES;
5548                         }
5549
5550                         max_access = &env->prog->aux->max_rdonly_access;
5551                 } else {
5552                         max_access = &env->prog->aux->max_rdwr_access;
5553                 }
5554                 return check_buffer_access(env, reg, regno, reg->off,
5555                                            access_size, zero_size_allowed,
5556                                            max_access);
5557         case PTR_TO_STACK:
5558                 return check_stack_range_initialized(
5559                                 env,
5560                                 regno, reg->off, access_size,
5561                                 zero_size_allowed, ACCESS_HELPER, meta);
5562         case PTR_TO_CTX:
5563                 /* in case the function doesn't know how to access the context,
5564                  * (because we are in a program of type SYSCALL for example), we
5565                  * can not statically check its size.
5566                  * Dynamically check it now.
5567                  */
5568                 if (!env->ops->convert_ctx_access) {
5569                         enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ;
5570                         int offset = access_size - 1;
5571
5572                         /* Allow zero-byte read from PTR_TO_CTX */
5573                         if (access_size == 0)
5574                                 return zero_size_allowed ? 0 : -EACCES;
5575
5576                         return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
5577                                                 atype, -1, false);
5578                 }
5579
5580                 fallthrough;
5581         default: /* scalar_value or invalid ptr */
5582                 /* Allow zero-byte read from NULL, regardless of pointer type */
5583                 if (zero_size_allowed && access_size == 0 &&
5584                     register_is_null(reg))
5585                         return 0;
5586
5587                 verbose(env, "R%d type=%s ", regno,
5588                         reg_type_str(env, reg->type));
5589                 verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
5590                 return -EACCES;
5591         }
5592 }
5593
5594 static int check_mem_size_reg(struct bpf_verifier_env *env,
5595                               struct bpf_reg_state *reg, u32 regno,
5596                               bool zero_size_allowed,
5597                               struct bpf_call_arg_meta *meta)
5598 {
5599         int err;
5600
5601         /* This is used to refine r0 return value bounds for helpers
5602          * that enforce this value as an upper bound on return values.
5603          * See do_refine_retval_range() for helpers that can refine
5604          * the return value. C type of helper is u32 so we pull register
5605          * bound from umax_value however, if negative verifier errors
5606          * out. Only upper bounds can be learned because retval is an
5607          * int type and negative retvals are allowed.
5608          */
5609         meta->msize_max_value = reg->umax_value;
5610
5611         /* The register is SCALAR_VALUE; the access check
5612          * happens using its boundaries.
5613          */
5614         if (!tnum_is_const(reg->var_off))
5615                 /* For unprivileged variable accesses, disable raw
5616                  * mode so that the program is required to
5617                  * initialize all the memory that the helper could
5618                  * just partially fill up.
5619                  */
5620                 meta = NULL;
5621
5622         if (reg->smin_value < 0) {
5623                 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
5624                         regno);
5625                 return -EACCES;
5626         }
5627
5628         if (reg->umin_value == 0) {
5629                 err = check_helper_mem_access(env, regno - 1, 0,
5630                                               zero_size_allowed,
5631                                               meta);
5632                 if (err)
5633                         return err;
5634         }
5635
5636         if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
5637                 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
5638                         regno);
5639                 return -EACCES;
5640         }
5641         err = check_helper_mem_access(env, regno - 1,
5642                                       reg->umax_value,
5643                                       zero_size_allowed, meta);
5644         if (!err)
5645                 err = mark_chain_precision(env, regno);
5646         return err;
5647 }
5648
5649 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
5650                    u32 regno, u32 mem_size)
5651 {
5652         bool may_be_null = type_may_be_null(reg->type);
5653         struct bpf_reg_state saved_reg;
5654         struct bpf_call_arg_meta meta;
5655         int err;
5656
5657         if (register_is_null(reg))
5658                 return 0;
5659
5660         memset(&meta, 0, sizeof(meta));
5661         /* Assuming that the register contains a value check if the memory
5662          * access is safe. Temporarily save and restore the register's state as
5663          * the conversion shouldn't be visible to a caller.
5664          */
5665         if (may_be_null) {
5666                 saved_reg = *reg;
5667                 mark_ptr_not_null_reg(reg);
5668         }
5669
5670         err = check_helper_mem_access(env, regno, mem_size, true, &meta);
5671         /* Check access for BPF_WRITE */
5672         meta.raw_mode = true;
5673         err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
5674
5675         if (may_be_null)
5676                 *reg = saved_reg;
5677
5678         return err;
5679 }
5680
5681 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
5682                                     u32 regno)
5683 {
5684         struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
5685         bool may_be_null = type_may_be_null(mem_reg->type);
5686         struct bpf_reg_state saved_reg;
5687         struct bpf_call_arg_meta meta;
5688         int err;
5689
5690         WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
5691
5692         memset(&meta, 0, sizeof(meta));
5693
5694         if (may_be_null) {
5695                 saved_reg = *mem_reg;
5696                 mark_ptr_not_null_reg(mem_reg);
5697         }
5698
5699         err = check_mem_size_reg(env, reg, regno, true, &meta);
5700         /* Check access for BPF_WRITE */
5701         meta.raw_mode = true;
5702         err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
5703
5704         if (may_be_null)
5705                 *mem_reg = saved_reg;
5706         return err;
5707 }
5708
5709 /* Implementation details:
5710  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
5711  * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
5712  * Two bpf_map_lookups (even with the same key) will have different reg->id.
5713  * Two separate bpf_obj_new will also have different reg->id.
5714  * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
5715  * clears reg->id after value_or_null->value transition, since the verifier only
5716  * cares about the range of access to valid map value pointer and doesn't care
5717  * about actual address of the map element.
5718  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
5719  * reg->id > 0 after value_or_null->value transition. By doing so
5720  * two bpf_map_lookups will be considered two different pointers that
5721  * point to different bpf_spin_locks. Likewise for pointers to allocated objects
5722  * returned from bpf_obj_new.
5723  * The verifier allows taking only one bpf_spin_lock at a time to avoid
5724  * dead-locks.
5725  * Since only one bpf_spin_lock is allowed the checks are simpler than
5726  * reg_is_refcounted() logic. The verifier needs to remember only
5727  * one spin_lock instead of array of acquired_refs.
5728  * cur_state->active_lock remembers which map value element or allocated
5729  * object got locked and clears it after bpf_spin_unlock.
5730  */
5731 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
5732                              bool is_lock)
5733 {
5734         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5735         struct bpf_verifier_state *cur = env->cur_state;
5736         bool is_const = tnum_is_const(reg->var_off);
5737         u64 val = reg->var_off.value;
5738         struct bpf_map *map = NULL;
5739         struct btf *btf = NULL;
5740         struct btf_record *rec;
5741
5742         if (!is_const) {
5743                 verbose(env,
5744                         "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
5745                         regno);
5746                 return -EINVAL;
5747         }
5748         if (reg->type == PTR_TO_MAP_VALUE) {
5749                 map = reg->map_ptr;
5750                 if (!map->btf) {
5751                         verbose(env,
5752                                 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
5753                                 map->name);
5754                         return -EINVAL;
5755                 }
5756         } else {
5757                 btf = reg->btf;
5758         }
5759
5760         rec = reg_btf_record(reg);
5761         if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) {
5762                 verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local",
5763                         map ? map->name : "kptr");
5764                 return -EINVAL;
5765         }
5766         if (rec->spin_lock_off != val + reg->off) {
5767                 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
5768                         val + reg->off, rec->spin_lock_off);
5769                 return -EINVAL;
5770         }
5771         if (is_lock) {
5772                 if (cur->active_lock.ptr) {
5773                         verbose(env,
5774                                 "Locking two bpf_spin_locks are not allowed\n");
5775                         return -EINVAL;
5776                 }
5777                 if (map)
5778                         cur->active_lock.ptr = map;
5779                 else
5780                         cur->active_lock.ptr = btf;
5781                 cur->active_lock.id = reg->id;
5782         } else {
5783                 struct bpf_func_state *fstate = cur_func(env);
5784                 void *ptr;
5785                 int i;
5786
5787                 if (map)
5788                         ptr = map;
5789                 else
5790                         ptr = btf;
5791
5792                 if (!cur->active_lock.ptr) {
5793                         verbose(env, "bpf_spin_unlock without taking a lock\n");
5794                         return -EINVAL;
5795                 }
5796                 if (cur->active_lock.ptr != ptr ||
5797                     cur->active_lock.id != reg->id) {
5798                         verbose(env, "bpf_spin_unlock of different lock\n");
5799                         return -EINVAL;
5800                 }
5801                 cur->active_lock.ptr = NULL;
5802                 cur->active_lock.id = 0;
5803
5804                 for (i = fstate->acquired_refs - 1; i >= 0; i--) {
5805                         int err;
5806
5807                         /* Complain on error because this reference state cannot
5808                          * be freed before this point, as bpf_spin_lock critical
5809                          * section does not allow functions that release the
5810                          * allocated object immediately.
5811                          */
5812                         if (!fstate->refs[i].release_on_unlock)
5813                                 continue;
5814                         err = release_reference(env, fstate->refs[i].id);
5815                         if (err) {
5816                                 verbose(env, "failed to release release_on_unlock reference");
5817                                 return err;
5818                         }
5819                 }
5820         }
5821         return 0;
5822 }
5823
5824 static int process_timer_func(struct bpf_verifier_env *env, int regno,
5825                               struct bpf_call_arg_meta *meta)
5826 {
5827         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5828         bool is_const = tnum_is_const(reg->var_off);
5829         struct bpf_map *map = reg->map_ptr;
5830         u64 val = reg->var_off.value;
5831
5832         if (!is_const) {
5833                 verbose(env,
5834                         "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
5835                         regno);
5836                 return -EINVAL;
5837         }
5838         if (!map->btf) {
5839                 verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
5840                         map->name);
5841                 return -EINVAL;
5842         }
5843         if (!btf_record_has_field(map->record, BPF_TIMER)) {
5844                 verbose(env, "map '%s' has no valid bpf_timer\n", map->name);
5845                 return -EINVAL;
5846         }
5847         if (map->record->timer_off != val + reg->off) {
5848                 verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
5849                         val + reg->off, map->record->timer_off);
5850                 return -EINVAL;
5851         }
5852         if (meta->map_ptr) {
5853                 verbose(env, "verifier bug. Two map pointers in a timer helper\n");
5854                 return -EFAULT;
5855         }
5856         meta->map_uid = reg->map_uid;
5857         meta->map_ptr = map;
5858         return 0;
5859 }
5860
5861 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
5862                              struct bpf_call_arg_meta *meta)
5863 {
5864         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5865         struct bpf_map *map_ptr = reg->map_ptr;
5866         struct btf_field *kptr_field;
5867         u32 kptr_off;
5868
5869         if (!tnum_is_const(reg->var_off)) {
5870                 verbose(env,
5871                         "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
5872                         regno);
5873                 return -EINVAL;
5874         }
5875         if (!map_ptr->btf) {
5876                 verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
5877                         map_ptr->name);
5878                 return -EINVAL;
5879         }
5880         if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
5881                 verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
5882                 return -EINVAL;
5883         }
5884
5885         meta->map_ptr = map_ptr;
5886         kptr_off = reg->off + reg->var_off.value;
5887         kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
5888         if (!kptr_field) {
5889                 verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
5890                 return -EACCES;
5891         }
5892         if (kptr_field->type != BPF_KPTR_REF) {
5893                 verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
5894                 return -EACCES;
5895         }
5896         meta->kptr_field = kptr_field;
5897         return 0;
5898 }
5899
5900 /* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
5901  * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
5902  *
5903  * In both cases we deal with the first 8 bytes, but need to mark the next 8
5904  * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
5905  * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
5906  *
5907  * Mutability of bpf_dynptr is at two levels, one is at the level of struct
5908  * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
5909  * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
5910  * mutate the view of the dynptr and also possibly destroy it. In the latter
5911  * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
5912  * memory that dynptr points to.
5913  *
5914  * The verifier will keep track both levels of mutation (bpf_dynptr's in
5915  * reg->type and the memory's in reg->dynptr.type), but there is no support for
5916  * readonly dynptr view yet, hence only the first case is tracked and checked.
5917  *
5918  * This is consistent with how C applies the const modifier to a struct object,
5919  * where the pointer itself inside bpf_dynptr becomes const but not what it
5920  * points to.
5921  *
5922  * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
5923  * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
5924  */
5925 int process_dynptr_func(struct bpf_verifier_env *env, int regno,
5926                         enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta)
5927 {
5928         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5929
5930         /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
5931          * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
5932          */
5933         if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
5934                 verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
5935                 return -EFAULT;
5936         }
5937         /* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
5938          * check_func_arg_reg_off's logic. We only need to check offset
5939          * alignment for PTR_TO_STACK.
5940          */
5941         if (reg->type == PTR_TO_STACK && (reg->off % BPF_REG_SIZE)) {
5942                 verbose(env, "cannot pass in dynptr at an offset=%d\n", reg->off);
5943                 return -EINVAL;
5944         }
5945         /*  MEM_UNINIT - Points to memory that is an appropriate candidate for
5946          *               constructing a mutable bpf_dynptr object.
5947          *
5948          *               Currently, this is only possible with PTR_TO_STACK
5949          *               pointing to a region of at least 16 bytes which doesn't
5950          *               contain an existing bpf_dynptr.
5951          *
5952          *  MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
5953          *               mutated or destroyed. However, the memory it points to
5954          *               may be mutated.
5955          *
5956          *  None       - Points to a initialized dynptr that can be mutated and
5957          *               destroyed, including mutation of the memory it points
5958          *               to.
5959          */
5960         if (arg_type & MEM_UNINIT) {
5961                 if (!is_dynptr_reg_valid_uninit(env, reg)) {
5962                         verbose(env, "Dynptr has to be an uninitialized dynptr\n");
5963                         return -EINVAL;
5964                 }
5965
5966                 /* We only support one dynptr being uninitialized at the moment,
5967                  * which is sufficient for the helper functions we have right now.
5968                  */
5969                 if (meta->uninit_dynptr_regno) {
5970                         verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
5971                         return -EFAULT;
5972                 }
5973
5974                 meta->uninit_dynptr_regno = regno;
5975         } else /* MEM_RDONLY and None case from above */ {
5976                 /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
5977                 if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
5978                         verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
5979                         return -EINVAL;
5980                 }
5981
5982                 if (!is_dynptr_reg_valid_init(env, reg)) {
5983                         verbose(env,
5984                                 "Expected an initialized dynptr as arg #%d\n",
5985                                 regno);
5986                         return -EINVAL;
5987                 }
5988
5989                 /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
5990                 if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
5991                         const char *err_extra = "";
5992
5993                         switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
5994                         case DYNPTR_TYPE_LOCAL:
5995                                 err_extra = "local";
5996                                 break;
5997                         case DYNPTR_TYPE_RINGBUF:
5998                                 err_extra = "ringbuf";
5999                                 break;
6000                         default:
6001                                 err_extra = "<unknown>";
6002                                 break;
6003                         }
6004                         verbose(env,
6005                                 "Expected a dynptr of type %s as arg #%d\n",
6006                                 err_extra, regno);
6007                         return -EINVAL;
6008                 }
6009         }
6010         return 0;
6011 }
6012
6013 static bool arg_type_is_mem_size(enum bpf_arg_type type)
6014 {
6015         return type == ARG_CONST_SIZE ||
6016                type == ARG_CONST_SIZE_OR_ZERO;
6017 }
6018
6019 static bool arg_type_is_release(enum bpf_arg_type type)
6020 {
6021         return type & OBJ_RELEASE;
6022 }
6023
6024 static bool arg_type_is_dynptr(enum bpf_arg_type type)
6025 {
6026         return base_type(type) == ARG_PTR_TO_DYNPTR;
6027 }
6028
6029 static int int_ptr_type_to_size(enum bpf_arg_type type)
6030 {
6031         if (type == ARG_PTR_TO_INT)
6032                 return sizeof(u32);
6033         else if (type == ARG_PTR_TO_LONG)
6034                 return sizeof(u64);
6035
6036         return -EINVAL;
6037 }
6038
6039 static int resolve_map_arg_type(struct bpf_verifier_env *env,
6040                                  const struct bpf_call_arg_meta *meta,
6041                                  enum bpf_arg_type *arg_type)
6042 {
6043         if (!meta->map_ptr) {
6044                 /* kernel subsystem misconfigured verifier */
6045                 verbose(env, "invalid map_ptr to access map->type\n");
6046                 return -EACCES;
6047         }
6048
6049         switch (meta->map_ptr->map_type) {
6050         case BPF_MAP_TYPE_SOCKMAP:
6051         case BPF_MAP_TYPE_SOCKHASH:
6052                 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
6053                         *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
6054                 } else {
6055                         verbose(env, "invalid arg_type for sockmap/sockhash\n");
6056                         return -EINVAL;
6057                 }
6058                 break;
6059         case BPF_MAP_TYPE_BLOOM_FILTER:
6060                 if (meta->func_id == BPF_FUNC_map_peek_elem)
6061                         *arg_type = ARG_PTR_TO_MAP_VALUE;
6062                 break;
6063         default:
6064                 break;
6065         }
6066         return 0;
6067 }
6068
6069 struct bpf_reg_types {
6070         const enum bpf_reg_type types[10];
6071         u32 *btf_id;
6072 };
6073
6074 static const struct bpf_reg_types sock_types = {
6075         .types = {
6076                 PTR_TO_SOCK_COMMON,
6077                 PTR_TO_SOCKET,
6078                 PTR_TO_TCP_SOCK,
6079                 PTR_TO_XDP_SOCK,
6080         },
6081 };
6082
6083 #ifdef CONFIG_NET
6084 static const struct bpf_reg_types btf_id_sock_common_types = {
6085         .types = {
6086                 PTR_TO_SOCK_COMMON,
6087                 PTR_TO_SOCKET,
6088                 PTR_TO_TCP_SOCK,
6089                 PTR_TO_XDP_SOCK,
6090                 PTR_TO_BTF_ID,
6091                 PTR_TO_BTF_ID | PTR_TRUSTED,
6092         },
6093         .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
6094 };
6095 #endif
6096
6097 static const struct bpf_reg_types mem_types = {
6098         .types = {
6099                 PTR_TO_STACK,
6100                 PTR_TO_PACKET,
6101                 PTR_TO_PACKET_META,
6102                 PTR_TO_MAP_KEY,
6103                 PTR_TO_MAP_VALUE,
6104                 PTR_TO_MEM,
6105                 PTR_TO_MEM | MEM_RINGBUF,
6106                 PTR_TO_BUF,
6107         },
6108 };
6109
6110 static const struct bpf_reg_types int_ptr_types = {
6111         .types = {
6112                 PTR_TO_STACK,
6113                 PTR_TO_PACKET,
6114                 PTR_TO_PACKET_META,
6115                 PTR_TO_MAP_KEY,
6116                 PTR_TO_MAP_VALUE,
6117         },
6118 };
6119
6120 static const struct bpf_reg_types spin_lock_types = {
6121         .types = {
6122                 PTR_TO_MAP_VALUE,
6123                 PTR_TO_BTF_ID | MEM_ALLOC,
6124         }
6125 };
6126
6127 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
6128 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
6129 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
6130 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
6131 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
6132 static const struct bpf_reg_types btf_ptr_types = {
6133         .types = {
6134                 PTR_TO_BTF_ID,
6135                 PTR_TO_BTF_ID | PTR_TRUSTED,
6136                 PTR_TO_BTF_ID | MEM_RCU,
6137         },
6138 };
6139 static const struct bpf_reg_types percpu_btf_ptr_types = {
6140         .types = {
6141                 PTR_TO_BTF_ID | MEM_PERCPU,
6142                 PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
6143         }
6144 };
6145 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
6146 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
6147 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
6148 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
6149 static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
6150 static const struct bpf_reg_types dynptr_types = {
6151         .types = {
6152                 PTR_TO_STACK,
6153                 CONST_PTR_TO_DYNPTR,
6154         }
6155 };
6156
6157 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
6158         [ARG_PTR_TO_MAP_KEY]            = &mem_types,
6159         [ARG_PTR_TO_MAP_VALUE]          = &mem_types,
6160         [ARG_CONST_SIZE]                = &scalar_types,
6161         [ARG_CONST_SIZE_OR_ZERO]        = &scalar_types,
6162         [ARG_CONST_ALLOC_SIZE_OR_ZERO]  = &scalar_types,
6163         [ARG_CONST_MAP_PTR]             = &const_map_ptr_types,
6164         [ARG_PTR_TO_CTX]                = &context_types,
6165         [ARG_PTR_TO_SOCK_COMMON]        = &sock_types,
6166 #ifdef CONFIG_NET
6167         [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
6168 #endif
6169         [ARG_PTR_TO_SOCKET]             = &fullsock_types,
6170         [ARG_PTR_TO_BTF_ID]             = &btf_ptr_types,
6171         [ARG_PTR_TO_SPIN_LOCK]          = &spin_lock_types,
6172         [ARG_PTR_TO_MEM]                = &mem_types,
6173         [ARG_PTR_TO_RINGBUF_MEM]        = &ringbuf_mem_types,
6174         [ARG_PTR_TO_INT]                = &int_ptr_types,
6175         [ARG_PTR_TO_LONG]               = &int_ptr_types,
6176         [ARG_PTR_TO_PERCPU_BTF_ID]      = &percpu_btf_ptr_types,
6177         [ARG_PTR_TO_FUNC]               = &func_ptr_types,
6178         [ARG_PTR_TO_STACK]              = &stack_ptr_types,
6179         [ARG_PTR_TO_CONST_STR]          = &const_str_ptr_types,
6180         [ARG_PTR_TO_TIMER]              = &timer_types,
6181         [ARG_PTR_TO_KPTR]               = &kptr_types,
6182         [ARG_PTR_TO_DYNPTR]             = &dynptr_types,
6183 };
6184
6185 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
6186                           enum bpf_arg_type arg_type,
6187                           const u32 *arg_btf_id,
6188                           struct bpf_call_arg_meta *meta)
6189 {
6190         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6191         enum bpf_reg_type expected, type = reg->type;
6192         const struct bpf_reg_types *compatible;
6193         int i, j;
6194
6195         compatible = compatible_reg_types[base_type(arg_type)];
6196         if (!compatible) {
6197                 verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
6198                 return -EFAULT;
6199         }
6200
6201         /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
6202          * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
6203          *
6204          * Same for MAYBE_NULL:
6205          *
6206          * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
6207          * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
6208          *
6209          * Therefore we fold these flags depending on the arg_type before comparison.
6210          */
6211         if (arg_type & MEM_RDONLY)
6212                 type &= ~MEM_RDONLY;
6213         if (arg_type & PTR_MAYBE_NULL)
6214                 type &= ~PTR_MAYBE_NULL;
6215
6216         for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
6217                 expected = compatible->types[i];
6218                 if (expected == NOT_INIT)
6219                         break;
6220
6221                 if (type == expected)
6222                         goto found;
6223         }
6224
6225         verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
6226         for (j = 0; j + 1 < i; j++)
6227                 verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
6228         verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
6229         return -EACCES;
6230
6231 found:
6232         if (reg->type == PTR_TO_BTF_ID || reg->type & PTR_TRUSTED) {
6233                 /* For bpf_sk_release, it needs to match against first member
6234                  * 'struct sock_common', hence make an exception for it. This
6235                  * allows bpf_sk_release to work for multiple socket types.
6236                  */
6237                 bool strict_type_match = arg_type_is_release(arg_type) &&
6238                                          meta->func_id != BPF_FUNC_sk_release;
6239
6240                 if (!arg_btf_id) {
6241                         if (!compatible->btf_id) {
6242                                 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
6243                                 return -EFAULT;
6244                         }
6245                         arg_btf_id = compatible->btf_id;
6246                 }
6247
6248                 if (meta->func_id == BPF_FUNC_kptr_xchg) {
6249                         if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
6250                                 return -EACCES;
6251                 } else {
6252                         if (arg_btf_id == BPF_PTR_POISON) {
6253                                 verbose(env, "verifier internal error:");
6254                                 verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
6255                                         regno);
6256                                 return -EACCES;
6257                         }
6258
6259                         if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
6260                                                   btf_vmlinux, *arg_btf_id,
6261                                                   strict_type_match)) {
6262                                 verbose(env, "R%d is of type %s but %s is expected\n",
6263                                         regno, kernel_type_name(reg->btf, reg->btf_id),
6264                                         kernel_type_name(btf_vmlinux, *arg_btf_id));
6265                                 return -EACCES;
6266                         }
6267                 }
6268         } else if (type_is_alloc(reg->type)) {
6269                 if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) {
6270                         verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
6271                         return -EFAULT;
6272                 }
6273         }
6274
6275         return 0;
6276 }
6277
6278 int check_func_arg_reg_off(struct bpf_verifier_env *env,
6279                            const struct bpf_reg_state *reg, int regno,
6280                            enum bpf_arg_type arg_type)
6281 {
6282         u32 type = reg->type;
6283
6284         /* When referenced register is passed to release function, its fixed
6285          * offset must be 0.
6286          *
6287          * We will check arg_type_is_release reg has ref_obj_id when storing
6288          * meta->release_regno.
6289          */
6290         if (arg_type_is_release(arg_type)) {
6291                 /* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
6292                  * may not directly point to the object being released, but to
6293                  * dynptr pointing to such object, which might be at some offset
6294                  * on the stack. In that case, we simply to fallback to the
6295                  * default handling.
6296                  */
6297                 if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
6298                         return 0;
6299                 /* Doing check_ptr_off_reg check for the offset will catch this
6300                  * because fixed_off_ok is false, but checking here allows us
6301                  * to give the user a better error message.
6302                  */
6303                 if (reg->off) {
6304                         verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
6305                                 regno);
6306                         return -EINVAL;
6307                 }
6308                 return __check_ptr_off_reg(env, reg, regno, false);
6309         }
6310
6311         switch (type) {
6312         /* Pointer types where both fixed and variable offset is explicitly allowed: */
6313         case PTR_TO_STACK:
6314         case PTR_TO_PACKET:
6315         case PTR_TO_PACKET_META:
6316         case PTR_TO_MAP_KEY:
6317         case PTR_TO_MAP_VALUE:
6318         case PTR_TO_MEM:
6319         case PTR_TO_MEM | MEM_RDONLY:
6320         case PTR_TO_MEM | MEM_RINGBUF:
6321         case PTR_TO_BUF:
6322         case PTR_TO_BUF | MEM_RDONLY:
6323         case SCALAR_VALUE:
6324                 return 0;
6325         /* All the rest must be rejected, except PTR_TO_BTF_ID which allows
6326          * fixed offset.
6327          */
6328         case PTR_TO_BTF_ID:
6329         case PTR_TO_BTF_ID | MEM_ALLOC:
6330         case PTR_TO_BTF_ID | PTR_TRUSTED:
6331         case PTR_TO_BTF_ID | MEM_RCU:
6332         case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
6333                 /* When referenced PTR_TO_BTF_ID is passed to release function,
6334                  * its fixed offset must be 0. In the other cases, fixed offset
6335                  * can be non-zero. This was already checked above. So pass
6336                  * fixed_off_ok as true to allow fixed offset for all other
6337                  * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
6338                  * still need to do checks instead of returning.
6339                  */
6340                 return __check_ptr_off_reg(env, reg, regno, true);
6341         default:
6342                 return __check_ptr_off_reg(env, reg, regno, false);
6343         }
6344 }
6345
6346 static u32 dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
6347 {
6348         struct bpf_func_state *state = func(env, reg);
6349         int spi;
6350
6351         if (reg->type == CONST_PTR_TO_DYNPTR)
6352                 return reg->ref_obj_id;
6353
6354         spi = get_spi(reg->off);
6355         return state->stack[spi].spilled_ptr.ref_obj_id;
6356 }
6357
6358 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
6359                           struct bpf_call_arg_meta *meta,
6360                           const struct bpf_func_proto *fn)
6361 {
6362         u32 regno = BPF_REG_1 + arg;
6363         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6364         enum bpf_arg_type arg_type = fn->arg_type[arg];
6365         enum bpf_reg_type type = reg->type;
6366         u32 *arg_btf_id = NULL;
6367         int err = 0;
6368
6369         if (arg_type == ARG_DONTCARE)
6370                 return 0;
6371
6372         err = check_reg_arg(env, regno, SRC_OP);
6373         if (err)
6374                 return err;
6375
6376         if (arg_type == ARG_ANYTHING) {
6377                 if (is_pointer_value(env, regno)) {
6378                         verbose(env, "R%d leaks addr into helper function\n",
6379                                 regno);
6380                         return -EACCES;
6381                 }
6382                 return 0;
6383         }
6384
6385         if (type_is_pkt_pointer(type) &&
6386             !may_access_direct_pkt_data(env, meta, BPF_READ)) {
6387                 verbose(env, "helper access to the packet is not allowed\n");
6388                 return -EACCES;
6389         }
6390
6391         if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
6392                 err = resolve_map_arg_type(env, meta, &arg_type);
6393                 if (err)
6394                         return err;
6395         }
6396
6397         if (register_is_null(reg) && type_may_be_null(arg_type))
6398                 /* A NULL register has a SCALAR_VALUE type, so skip
6399                  * type checking.
6400                  */
6401                 goto skip_type_check;
6402
6403         /* arg_btf_id and arg_size are in a union. */
6404         if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
6405             base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
6406                 arg_btf_id = fn->arg_btf_id[arg];
6407
6408         err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
6409         if (err)
6410                 return err;
6411
6412         err = check_func_arg_reg_off(env, reg, regno, arg_type);
6413         if (err)
6414                 return err;
6415
6416 skip_type_check:
6417         if (arg_type_is_release(arg_type)) {
6418                 if (arg_type_is_dynptr(arg_type)) {
6419                         struct bpf_func_state *state = func(env, reg);
6420                         int spi;
6421
6422                         /* Only dynptr created on stack can be released, thus
6423                          * the get_spi and stack state checks for spilled_ptr
6424                          * should only be done before process_dynptr_func for
6425                          * PTR_TO_STACK.
6426                          */
6427                         if (reg->type == PTR_TO_STACK) {
6428                                 spi = get_spi(reg->off);
6429                                 if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
6430                                     !state->stack[spi].spilled_ptr.ref_obj_id) {
6431                                         verbose(env, "arg %d is an unacquired reference\n", regno);
6432                                         return -EINVAL;
6433                                 }
6434                         } else {
6435                                 verbose(env, "cannot release unowned const bpf_dynptr\n");
6436                                 return -EINVAL;
6437                         }
6438                 } else if (!reg->ref_obj_id && !register_is_null(reg)) {
6439                         verbose(env, "R%d must be referenced when passed to release function\n",
6440                                 regno);
6441                         return -EINVAL;
6442                 }
6443                 if (meta->release_regno) {
6444                         verbose(env, "verifier internal error: more than one release argument\n");
6445                         return -EFAULT;
6446                 }
6447                 meta->release_regno = regno;
6448         }
6449
6450         if (reg->ref_obj_id) {
6451                 if (meta->ref_obj_id) {
6452                         verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
6453                                 regno, reg->ref_obj_id,
6454                                 meta->ref_obj_id);
6455                         return -EFAULT;
6456                 }
6457                 meta->ref_obj_id = reg->ref_obj_id;
6458         }
6459
6460         switch (base_type(arg_type)) {
6461         case ARG_CONST_MAP_PTR:
6462                 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
6463                 if (meta->map_ptr) {
6464                         /* Use map_uid (which is unique id of inner map) to reject:
6465                          * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
6466                          * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
6467                          * if (inner_map1 && inner_map2) {
6468                          *     timer = bpf_map_lookup_elem(inner_map1);
6469                          *     if (timer)
6470                          *         // mismatch would have been allowed
6471                          *         bpf_timer_init(timer, inner_map2);
6472                          * }
6473                          *
6474                          * Comparing map_ptr is enough to distinguish normal and outer maps.
6475                          */
6476                         if (meta->map_ptr != reg->map_ptr ||
6477                             meta->map_uid != reg->map_uid) {
6478                                 verbose(env,
6479                                         "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
6480                                         meta->map_uid, reg->map_uid);
6481                                 return -EINVAL;
6482                         }
6483                 }
6484                 meta->map_ptr = reg->map_ptr;
6485                 meta->map_uid = reg->map_uid;
6486                 break;
6487         case ARG_PTR_TO_MAP_KEY:
6488                 /* bpf_map_xxx(..., map_ptr, ..., key) call:
6489                  * check that [key, key + map->key_size) are within
6490                  * stack limits and initialized
6491                  */
6492                 if (!meta->map_ptr) {
6493                         /* in function declaration map_ptr must come before
6494                          * map_key, so that it's verified and known before
6495                          * we have to check map_key here. Otherwise it means
6496                          * that kernel subsystem misconfigured verifier
6497                          */
6498                         verbose(env, "invalid map_ptr to access map->key\n");
6499                         return -EACCES;
6500                 }
6501                 err = check_helper_mem_access(env, regno,
6502                                               meta->map_ptr->key_size, false,
6503                                               NULL);
6504                 break;
6505         case ARG_PTR_TO_MAP_VALUE:
6506                 if (type_may_be_null(arg_type) && register_is_null(reg))
6507                         return 0;
6508
6509                 /* bpf_map_xxx(..., map_ptr, ..., value) call:
6510                  * check [value, value + map->value_size) validity
6511                  */
6512                 if (!meta->map_ptr) {
6513                         /* kernel subsystem misconfigured verifier */
6514                         verbose(env, "invalid map_ptr to access map->value\n");
6515                         return -EACCES;
6516                 }
6517                 meta->raw_mode = arg_type & MEM_UNINIT;
6518                 err = check_helper_mem_access(env, regno,
6519                                               meta->map_ptr->value_size, false,
6520                                               meta);
6521                 break;
6522         case ARG_PTR_TO_PERCPU_BTF_ID:
6523                 if (!reg->btf_id) {
6524                         verbose(env, "Helper has invalid btf_id in R%d\n", regno);
6525                         return -EACCES;
6526                 }
6527                 meta->ret_btf = reg->btf;
6528                 meta->ret_btf_id = reg->btf_id;
6529                 break;
6530         case ARG_PTR_TO_SPIN_LOCK:
6531                 if (meta->func_id == BPF_FUNC_spin_lock) {
6532                         err = process_spin_lock(env, regno, true);
6533                         if (err)
6534                                 return err;
6535                 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
6536                         err = process_spin_lock(env, regno, false);
6537                         if (err)
6538                                 return err;
6539                 } else {
6540                         verbose(env, "verifier internal error\n");
6541                         return -EFAULT;
6542                 }
6543                 break;
6544         case ARG_PTR_TO_TIMER:
6545                 err = process_timer_func(env, regno, meta);
6546                 if (err)
6547                         return err;
6548                 break;
6549         case ARG_PTR_TO_FUNC:
6550                 meta->subprogno = reg->subprogno;
6551                 break;
6552         case ARG_PTR_TO_MEM:
6553                 /* The access to this pointer is only checked when we hit the
6554                  * next is_mem_size argument below.
6555                  */
6556                 meta->raw_mode = arg_type & MEM_UNINIT;
6557                 if (arg_type & MEM_FIXED_SIZE) {
6558                         err = check_helper_mem_access(env, regno,
6559                                                       fn->arg_size[arg], false,
6560                                                       meta);
6561                 }
6562                 break;
6563         case ARG_CONST_SIZE:
6564                 err = check_mem_size_reg(env, reg, regno, false, meta);
6565                 break;
6566         case ARG_CONST_SIZE_OR_ZERO:
6567                 err = check_mem_size_reg(env, reg, regno, true, meta);
6568                 break;
6569         case ARG_PTR_TO_DYNPTR:
6570                 err = process_dynptr_func(env, regno, arg_type, meta);
6571                 if (err)
6572                         return err;
6573                 break;
6574         case ARG_CONST_ALLOC_SIZE_OR_ZERO:
6575                 if (!tnum_is_const(reg->var_off)) {
6576                         verbose(env, "R%d is not a known constant'\n",
6577                                 regno);
6578                         return -EACCES;
6579                 }
6580                 meta->mem_size = reg->var_off.value;
6581                 err = mark_chain_precision(env, regno);
6582                 if (err)
6583                         return err;
6584                 break;
6585         case ARG_PTR_TO_INT:
6586         case ARG_PTR_TO_LONG:
6587         {
6588                 int size = int_ptr_type_to_size(arg_type);
6589
6590                 err = check_helper_mem_access(env, regno, size, false, meta);
6591                 if (err)
6592                         return err;
6593                 err = check_ptr_alignment(env, reg, 0, size, true);
6594                 break;
6595         }
6596         case ARG_PTR_TO_CONST_STR:
6597         {
6598                 struct bpf_map *map = reg->map_ptr;
6599                 int map_off;
6600                 u64 map_addr;
6601                 char *str_ptr;
6602
6603                 if (!bpf_map_is_rdonly(map)) {
6604                         verbose(env, "R%d does not point to a readonly map'\n", regno);
6605                         return -EACCES;
6606                 }
6607
6608                 if (!tnum_is_const(reg->var_off)) {
6609                         verbose(env, "R%d is not a constant address'\n", regno);
6610                         return -EACCES;
6611                 }
6612
6613                 if (!map->ops->map_direct_value_addr) {
6614                         verbose(env, "no direct value access support for this map type\n");
6615                         return -EACCES;
6616                 }
6617
6618                 err = check_map_access(env, regno, reg->off,
6619                                        map->value_size - reg->off, false,
6620                                        ACCESS_HELPER);
6621                 if (err)
6622                         return err;
6623
6624                 map_off = reg->off + reg->var_off.value;
6625                 err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
6626                 if (err) {
6627                         verbose(env, "direct value access on string failed\n");
6628                         return err;
6629                 }
6630
6631                 str_ptr = (char *)(long)(map_addr);
6632                 if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
6633                         verbose(env, "string is not zero-terminated\n");
6634                         return -EINVAL;
6635                 }
6636                 break;
6637         }
6638         case ARG_PTR_TO_KPTR:
6639                 err = process_kptr_func(env, regno, meta);
6640                 if (err)
6641                         return err;
6642                 break;
6643         }
6644
6645         return err;
6646 }
6647
6648 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
6649 {
6650         enum bpf_attach_type eatype = env->prog->expected_attach_type;
6651         enum bpf_prog_type type = resolve_prog_type(env->prog);
6652
6653         if (func_id != BPF_FUNC_map_update_elem)
6654                 return false;
6655
6656         /* It's not possible to get access to a locked struct sock in these
6657          * contexts, so updating is safe.
6658          */
6659         switch (type) {
6660         case BPF_PROG_TYPE_TRACING:
6661                 if (eatype == BPF_TRACE_ITER)
6662                         return true;
6663                 break;
6664         case BPF_PROG_TYPE_SOCKET_FILTER:
6665         case BPF_PROG_TYPE_SCHED_CLS:
6666         case BPF_PROG_TYPE_SCHED_ACT:
6667         case BPF_PROG_TYPE_XDP:
6668         case BPF_PROG_TYPE_SK_REUSEPORT:
6669         case BPF_PROG_TYPE_FLOW_DISSECTOR:
6670         case BPF_PROG_TYPE_SK_LOOKUP:
6671                 return true;
6672         default:
6673                 break;
6674         }
6675
6676         verbose(env, "cannot update sockmap in this context\n");
6677         return false;
6678 }
6679
6680 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
6681 {
6682         return env->prog->jit_requested &&
6683                bpf_jit_supports_subprog_tailcalls();
6684 }
6685
6686 static int check_map_func_compatibility(struct bpf_verifier_env *env,
6687                                         struct bpf_map *map, int func_id)
6688 {
6689         if (!map)
6690                 return 0;
6691
6692         /* We need a two way check, first is from map perspective ... */
6693         switch (map->map_type) {
6694         case BPF_MAP_TYPE_PROG_ARRAY:
6695                 if (func_id != BPF_FUNC_tail_call)
6696                         goto error;
6697                 break;
6698         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
6699                 if (func_id != BPF_FUNC_perf_event_read &&
6700                     func_id != BPF_FUNC_perf_event_output &&
6701                     func_id != BPF_FUNC_skb_output &&
6702                     func_id != BPF_FUNC_perf_event_read_value &&
6703                     func_id != BPF_FUNC_xdp_output)
6704                         goto error;
6705                 break;
6706         case BPF_MAP_TYPE_RINGBUF:
6707                 if (func_id != BPF_FUNC_ringbuf_output &&
6708                     func_id != BPF_FUNC_ringbuf_reserve &&
6709                     func_id != BPF_FUNC_ringbuf_query &&
6710                     func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
6711                     func_id != BPF_FUNC_ringbuf_submit_dynptr &&
6712                     func_id != BPF_FUNC_ringbuf_discard_dynptr)
6713                         goto error;
6714                 break;
6715         case BPF_MAP_TYPE_USER_RINGBUF:
6716                 if (func_id != BPF_FUNC_user_ringbuf_drain)
6717                         goto error;
6718                 break;
6719         case BPF_MAP_TYPE_STACK_TRACE:
6720                 if (func_id != BPF_FUNC_get_stackid)
6721                         goto error;
6722                 break;
6723         case BPF_MAP_TYPE_CGROUP_ARRAY:
6724                 if (func_id != BPF_FUNC_skb_under_cgroup &&
6725                     func_id != BPF_FUNC_current_task_under_cgroup)
6726                         goto error;
6727                 break;
6728         case BPF_MAP_TYPE_CGROUP_STORAGE:
6729         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
6730                 if (func_id != BPF_FUNC_get_local_storage)
6731                         goto error;
6732                 break;
6733         case BPF_MAP_TYPE_DEVMAP:
6734         case BPF_MAP_TYPE_DEVMAP_HASH:
6735                 if (func_id != BPF_FUNC_redirect_map &&
6736                     func_id != BPF_FUNC_map_lookup_elem)
6737                         goto error;
6738                 break;
6739         /* Restrict bpf side of cpumap and xskmap, open when use-cases
6740          * appear.
6741          */
6742         case BPF_MAP_TYPE_CPUMAP:
6743                 if (func_id != BPF_FUNC_redirect_map)
6744                         goto error;
6745                 break;
6746         case BPF_MAP_TYPE_XSKMAP:
6747                 if (func_id != BPF_FUNC_redirect_map &&
6748                     func_id != BPF_FUNC_map_lookup_elem)
6749                         goto error;
6750                 break;
6751         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
6752         case BPF_MAP_TYPE_HASH_OF_MAPS:
6753                 if (func_id != BPF_FUNC_map_lookup_elem)
6754                         goto error;
6755                 break;
6756         case BPF_MAP_TYPE_SOCKMAP:
6757                 if (func_id != BPF_FUNC_sk_redirect_map &&
6758                     func_id != BPF_FUNC_sock_map_update &&
6759                     func_id != BPF_FUNC_map_delete_elem &&
6760                     func_id != BPF_FUNC_msg_redirect_map &&
6761                     func_id != BPF_FUNC_sk_select_reuseport &&
6762                     func_id != BPF_FUNC_map_lookup_elem &&
6763                     !may_update_sockmap(env, func_id))
6764                         goto error;
6765                 break;
6766         case BPF_MAP_TYPE_SOCKHASH:
6767                 if (func_id != BPF_FUNC_sk_redirect_hash &&
6768                     func_id != BPF_FUNC_sock_hash_update &&
6769                     func_id != BPF_FUNC_map_delete_elem &&
6770                     func_id != BPF_FUNC_msg_redirect_hash &&
6771                     func_id != BPF_FUNC_sk_select_reuseport &&
6772                     func_id != BPF_FUNC_map_lookup_elem &&
6773                     !may_update_sockmap(env, func_id))
6774                         goto error;
6775                 break;
6776         case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
6777                 if (func_id != BPF_FUNC_sk_select_reuseport)
6778                         goto error;
6779                 break;
6780         case BPF_MAP_TYPE_QUEUE:
6781         case BPF_MAP_TYPE_STACK:
6782                 if (func_id != BPF_FUNC_map_peek_elem &&
6783                     func_id != BPF_FUNC_map_pop_elem &&
6784                     func_id != BPF_FUNC_map_push_elem)
6785                         goto error;
6786                 break;
6787         case BPF_MAP_TYPE_SK_STORAGE:
6788                 if (func_id != BPF_FUNC_sk_storage_get &&
6789                     func_id != BPF_FUNC_sk_storage_delete)
6790                         goto error;
6791                 break;
6792         case BPF_MAP_TYPE_INODE_STORAGE:
6793                 if (func_id != BPF_FUNC_inode_storage_get &&
6794                     func_id != BPF_FUNC_inode_storage_delete)
6795                         goto error;
6796                 break;
6797         case BPF_MAP_TYPE_TASK_STORAGE:
6798                 if (func_id != BPF_FUNC_task_storage_get &&
6799                     func_id != BPF_FUNC_task_storage_delete)
6800                         goto error;
6801                 break;
6802         case BPF_MAP_TYPE_CGRP_STORAGE:
6803                 if (func_id != BPF_FUNC_cgrp_storage_get &&
6804                     func_id != BPF_FUNC_cgrp_storage_delete)
6805                         goto error;
6806                 break;
6807         case BPF_MAP_TYPE_BLOOM_FILTER:
6808                 if (func_id != BPF_FUNC_map_peek_elem &&
6809                     func_id != BPF_FUNC_map_push_elem)
6810                         goto error;
6811                 break;
6812         default:
6813                 break;
6814         }
6815
6816         /* ... and second from the function itself. */
6817         switch (func_id) {
6818         case BPF_FUNC_tail_call:
6819                 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
6820                         goto error;
6821                 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
6822                         verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
6823                         return -EINVAL;
6824                 }
6825                 break;
6826         case BPF_FUNC_perf_event_read:
6827         case BPF_FUNC_perf_event_output:
6828         case BPF_FUNC_perf_event_read_value:
6829         case BPF_FUNC_skb_output:
6830         case BPF_FUNC_xdp_output:
6831                 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
6832                         goto error;
6833                 break;
6834         case BPF_FUNC_ringbuf_output:
6835         case BPF_FUNC_ringbuf_reserve:
6836         case BPF_FUNC_ringbuf_query:
6837         case BPF_FUNC_ringbuf_reserve_dynptr:
6838         case BPF_FUNC_ringbuf_submit_dynptr:
6839         case BPF_FUNC_ringbuf_discard_dynptr:
6840                 if (map->map_type != BPF_MAP_TYPE_RINGBUF)
6841                         goto error;
6842                 break;
6843         case BPF_FUNC_user_ringbuf_drain:
6844                 if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
6845                         goto error;
6846                 break;
6847         case BPF_FUNC_get_stackid:
6848                 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
6849                         goto error;
6850                 break;
6851         case BPF_FUNC_current_task_under_cgroup:
6852         case BPF_FUNC_skb_under_cgroup:
6853                 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
6854                         goto error;
6855                 break;
6856         case BPF_FUNC_redirect_map:
6857                 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
6858                     map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
6859                     map->map_type != BPF_MAP_TYPE_CPUMAP &&
6860                     map->map_type != BPF_MAP_TYPE_XSKMAP)
6861                         goto error;
6862                 break;
6863         case BPF_FUNC_sk_redirect_map:
6864         case BPF_FUNC_msg_redirect_map:
6865         case BPF_FUNC_sock_map_update:
6866                 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
6867                         goto error;
6868                 break;
6869         case BPF_FUNC_sk_redirect_hash:
6870         case BPF_FUNC_msg_redirect_hash:
6871         case BPF_FUNC_sock_hash_update:
6872                 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
6873                         goto error;
6874                 break;
6875         case BPF_FUNC_get_local_storage:
6876                 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
6877                     map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
6878                         goto error;
6879                 break;
6880         case BPF_FUNC_sk_select_reuseport:
6881                 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
6882                     map->map_type != BPF_MAP_TYPE_SOCKMAP &&
6883                     map->map_type != BPF_MAP_TYPE_SOCKHASH)
6884                         goto error;
6885                 break;
6886         case BPF_FUNC_map_pop_elem:
6887                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
6888                     map->map_type != BPF_MAP_TYPE_STACK)
6889                         goto error;
6890                 break;
6891         case BPF_FUNC_map_peek_elem:
6892         case BPF_FUNC_map_push_elem:
6893                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
6894                     map->map_type != BPF_MAP_TYPE_STACK &&
6895                     map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
6896                         goto error;
6897                 break;
6898         case BPF_FUNC_map_lookup_percpu_elem:
6899                 if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
6900                     map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
6901                     map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
6902                         goto error;
6903                 break;
6904         case BPF_FUNC_sk_storage_get:
6905         case BPF_FUNC_sk_storage_delete:
6906                 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
6907                         goto error;
6908                 break;
6909         case BPF_FUNC_inode_storage_get:
6910         case BPF_FUNC_inode_storage_delete:
6911                 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
6912                         goto error;
6913                 break;
6914         case BPF_FUNC_task_storage_get:
6915         case BPF_FUNC_task_storage_delete:
6916                 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
6917                         goto error;
6918                 break;
6919         case BPF_FUNC_cgrp_storage_get:
6920         case BPF_FUNC_cgrp_storage_delete:
6921                 if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
6922                         goto error;
6923                 break;
6924         default:
6925                 break;
6926         }
6927
6928         return 0;
6929 error:
6930         verbose(env, "cannot pass map_type %d into func %s#%d\n",
6931                 map->map_type, func_id_name(func_id), func_id);
6932         return -EINVAL;
6933 }
6934
6935 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
6936 {
6937         int count = 0;
6938
6939         if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
6940                 count++;
6941         if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
6942                 count++;
6943         if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
6944                 count++;
6945         if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
6946                 count++;
6947         if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
6948                 count++;
6949
6950         /* We only support one arg being in raw mode at the moment,
6951          * which is sufficient for the helper functions we have
6952          * right now.
6953          */
6954         return count <= 1;
6955 }
6956
6957 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
6958 {
6959         bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
6960         bool has_size = fn->arg_size[arg] != 0;
6961         bool is_next_size = false;
6962
6963         if (arg + 1 < ARRAY_SIZE(fn->arg_type))
6964                 is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
6965
6966         if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
6967                 return is_next_size;
6968
6969         return has_size == is_next_size || is_next_size == is_fixed;
6970 }
6971
6972 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
6973 {
6974         /* bpf_xxx(..., buf, len) call will access 'len'
6975          * bytes from memory 'buf'. Both arg types need
6976          * to be paired, so make sure there's no buggy
6977          * helper function specification.
6978          */
6979         if (arg_type_is_mem_size(fn->arg1_type) ||
6980             check_args_pair_invalid(fn, 0) ||
6981             check_args_pair_invalid(fn, 1) ||
6982             check_args_pair_invalid(fn, 2) ||
6983             check_args_pair_invalid(fn, 3) ||
6984             check_args_pair_invalid(fn, 4))
6985                 return false;
6986
6987         return true;
6988 }
6989
6990 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
6991 {
6992         int i;
6993
6994         for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
6995                 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
6996                         return !!fn->arg_btf_id[i];
6997                 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
6998                         return fn->arg_btf_id[i] == BPF_PTR_POISON;
6999                 if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
7000                     /* arg_btf_id and arg_size are in a union. */
7001                     (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
7002                      !(fn->arg_type[i] & MEM_FIXED_SIZE)))
7003                         return false;
7004         }
7005
7006         return true;
7007 }
7008
7009 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
7010 {
7011         return check_raw_mode_ok(fn) &&
7012                check_arg_pair_ok(fn) &&
7013                check_btf_id_ok(fn) ? 0 : -EINVAL;
7014 }
7015
7016 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
7017  * are now invalid, so turn them into unknown SCALAR_VALUE.
7018  */
7019 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
7020 {
7021         struct bpf_func_state *state;
7022         struct bpf_reg_state *reg;
7023
7024         bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
7025                 if (reg_is_pkt_pointer_any(reg))
7026                         __mark_reg_unknown(env, reg);
7027         }));
7028 }
7029
7030 enum {
7031         AT_PKT_END = -1,
7032         BEYOND_PKT_END = -2,
7033 };
7034
7035 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
7036 {
7037         struct bpf_func_state *state = vstate->frame[vstate->curframe];
7038         struct bpf_reg_state *reg = &state->regs[regn];
7039
7040         if (reg->type != PTR_TO_PACKET)
7041                 /* PTR_TO_PACKET_META is not supported yet */
7042                 return;
7043
7044         /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
7045          * How far beyond pkt_end it goes is unknown.
7046          * if (!range_open) it's the case of pkt >= pkt_end
7047          * if (range_open) it's the case of pkt > pkt_end
7048          * hence this pointer is at least 1 byte bigger than pkt_end
7049          */
7050         if (range_open)
7051                 reg->range = BEYOND_PKT_END;
7052         else
7053                 reg->range = AT_PKT_END;
7054 }
7055
7056 /* The pointer with the specified id has released its reference to kernel
7057  * resources. Identify all copies of the same pointer and clear the reference.
7058  */
7059 static int release_reference(struct bpf_verifier_env *env,
7060                              int ref_obj_id)
7061 {
7062         struct bpf_func_state *state;
7063         struct bpf_reg_state *reg;
7064         int err;
7065
7066         err = release_reference_state(cur_func(env), ref_obj_id);
7067         if (err)
7068                 return err;
7069
7070         bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
7071                 if (reg->ref_obj_id == ref_obj_id) {
7072                         if (!env->allow_ptr_leaks)
7073                                 __mark_reg_not_init(env, reg);
7074                         else
7075                                 __mark_reg_unknown(env, reg);
7076                 }
7077         }));
7078
7079         return 0;
7080 }
7081
7082 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
7083                                     struct bpf_reg_state *regs)
7084 {
7085         int i;
7086
7087         /* after the call registers r0 - r5 were scratched */
7088         for (i = 0; i < CALLER_SAVED_REGS; i++) {
7089                 mark_reg_not_init(env, regs, caller_saved[i]);
7090                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
7091         }
7092 }
7093
7094 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
7095                                    struct bpf_func_state *caller,
7096                                    struct bpf_func_state *callee,
7097                                    int insn_idx);
7098
7099 static int set_callee_state(struct bpf_verifier_env *env,
7100                             struct bpf_func_state *caller,
7101                             struct bpf_func_state *callee, int insn_idx);
7102
7103 static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
7104                              int *insn_idx, int subprog,
7105                              set_callee_state_fn set_callee_state_cb)
7106 {
7107         struct bpf_verifier_state *state = env->cur_state;
7108         struct bpf_func_info_aux *func_info_aux;
7109         struct bpf_func_state *caller, *callee;
7110         int err;
7111         bool is_global = false;
7112
7113         if (state->curframe + 1 >= MAX_CALL_FRAMES) {
7114                 verbose(env, "the call stack of %d frames is too deep\n",
7115                         state->curframe + 2);
7116                 return -E2BIG;
7117         }
7118
7119         caller = state->frame[state->curframe];
7120         if (state->frame[state->curframe + 1]) {
7121                 verbose(env, "verifier bug. Frame %d already allocated\n",
7122                         state->curframe + 1);
7123                 return -EFAULT;
7124         }
7125
7126         func_info_aux = env->prog->aux->func_info_aux;
7127         if (func_info_aux)
7128                 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
7129         err = btf_check_subprog_call(env, subprog, caller->regs);
7130         if (err == -EFAULT)
7131                 return err;
7132         if (is_global) {
7133                 if (err) {
7134                         verbose(env, "Caller passes invalid args into func#%d\n",
7135                                 subprog);
7136                         return err;
7137                 } else {
7138                         if (env->log.level & BPF_LOG_LEVEL)
7139                                 verbose(env,
7140                                         "Func#%d is global and valid. Skipping.\n",
7141                                         subprog);
7142                         clear_caller_saved_regs(env, caller->regs);
7143
7144                         /* All global functions return a 64-bit SCALAR_VALUE */
7145                         mark_reg_unknown(env, caller->regs, BPF_REG_0);
7146                         caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
7147
7148                         /* continue with next insn after call */
7149                         return 0;
7150                 }
7151         }
7152
7153         /* set_callee_state is used for direct subprog calls, but we are
7154          * interested in validating only BPF helpers that can call subprogs as
7155          * callbacks
7156          */
7157         if (set_callee_state_cb != set_callee_state && !is_callback_calling_function(insn->imm)) {
7158                 verbose(env, "verifier bug: helper %s#%d is not marked as callback-calling\n",
7159                         func_id_name(insn->imm), insn->imm);
7160                 return -EFAULT;
7161         }
7162
7163         if (insn->code == (BPF_JMP | BPF_CALL) &&
7164             insn->src_reg == 0 &&
7165             insn->imm == BPF_FUNC_timer_set_callback) {
7166                 struct bpf_verifier_state *async_cb;
7167
7168                 /* there is no real recursion here. timer callbacks are async */
7169                 env->subprog_info[subprog].is_async_cb = true;
7170                 async_cb = push_async_cb(env, env->subprog_info[subprog].start,
7171                                          *insn_idx, subprog);
7172                 if (!async_cb)
7173                         return -EFAULT;
7174                 callee = async_cb->frame[0];
7175                 callee->async_entry_cnt = caller->async_entry_cnt + 1;
7176
7177                 /* Convert bpf_timer_set_callback() args into timer callback args */
7178                 err = set_callee_state_cb(env, caller, callee, *insn_idx);
7179                 if (err)
7180                         return err;
7181
7182                 clear_caller_saved_regs(env, caller->regs);
7183                 mark_reg_unknown(env, caller->regs, BPF_REG_0);
7184                 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
7185                 /* continue with next insn after call */
7186                 return 0;
7187         }
7188
7189         callee = kzalloc(sizeof(*callee), GFP_KERNEL);
7190         if (!callee)
7191                 return -ENOMEM;
7192         state->frame[state->curframe + 1] = callee;
7193
7194         /* callee cannot access r0, r6 - r9 for reading and has to write
7195          * into its own stack before reading from it.
7196          * callee can read/write into caller's stack
7197          */
7198         init_func_state(env, callee,
7199                         /* remember the callsite, it will be used by bpf_exit */
7200                         *insn_idx /* callsite */,
7201                         state->curframe + 1 /* frameno within this callchain */,
7202                         subprog /* subprog number within this prog */);
7203
7204         /* Transfer references to the callee */
7205         err = copy_reference_state(callee, caller);
7206         if (err)
7207                 goto err_out;
7208
7209         err = set_callee_state_cb(env, caller, callee, *insn_idx);
7210         if (err)
7211                 goto err_out;
7212
7213         clear_caller_saved_regs(env, caller->regs);
7214
7215         /* only increment it after check_reg_arg() finished */
7216         state->curframe++;
7217
7218         /* and go analyze first insn of the callee */
7219         *insn_idx = env->subprog_info[subprog].start - 1;
7220
7221         if (env->log.level & BPF_LOG_LEVEL) {
7222                 verbose(env, "caller:\n");
7223                 print_verifier_state(env, caller, true);
7224                 verbose(env, "callee:\n");
7225                 print_verifier_state(env, callee, true);
7226         }
7227         return 0;
7228
7229 err_out:
7230         free_func_state(callee);
7231         state->frame[state->curframe + 1] = NULL;
7232         return err;
7233 }
7234
7235 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
7236                                    struct bpf_func_state *caller,
7237                                    struct bpf_func_state *callee)
7238 {
7239         /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
7240          *      void *callback_ctx, u64 flags);
7241          * callback_fn(struct bpf_map *map, void *key, void *value,
7242          *      void *callback_ctx);
7243          */
7244         callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
7245
7246         callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
7247         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
7248         callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
7249
7250         callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
7251         __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
7252         callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
7253
7254         /* pointer to stack or null */
7255         callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
7256
7257         /* unused */
7258         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
7259         return 0;
7260 }
7261
7262 static int set_callee_state(struct bpf_verifier_env *env,
7263                             struct bpf_func_state *caller,
7264                             struct bpf_func_state *callee, int insn_idx)
7265 {
7266         int i;
7267
7268         /* copy r1 - r5 args that callee can access.  The copy includes parent
7269          * pointers, which connects us up to the liveness chain
7270          */
7271         for (i = BPF_REG_1; i <= BPF_REG_5; i++)
7272                 callee->regs[i] = caller->regs[i];
7273         return 0;
7274 }
7275
7276 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
7277                            int *insn_idx)
7278 {
7279         int subprog, target_insn;
7280
7281         target_insn = *insn_idx + insn->imm + 1;
7282         subprog = find_subprog(env, target_insn);
7283         if (subprog < 0) {
7284                 verbose(env, "verifier bug. No program starts at insn %d\n",
7285                         target_insn);
7286                 return -EFAULT;
7287         }
7288
7289         return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
7290 }
7291
7292 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
7293                                        struct bpf_func_state *caller,
7294                                        struct bpf_func_state *callee,
7295                                        int insn_idx)
7296 {
7297         struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
7298         struct bpf_map *map;
7299         int err;
7300
7301         if (bpf_map_ptr_poisoned(insn_aux)) {
7302                 verbose(env, "tail_call abusing map_ptr\n");
7303                 return -EINVAL;
7304         }
7305
7306         map = BPF_MAP_PTR(insn_aux->map_ptr_state);
7307         if (!map->ops->map_set_for_each_callback_args ||
7308             !map->ops->map_for_each_callback) {
7309                 verbose(env, "callback function not allowed for map\n");
7310                 return -ENOTSUPP;
7311         }
7312
7313         err = map->ops->map_set_for_each_callback_args(env, caller, callee);
7314         if (err)
7315                 return err;
7316
7317         callee->in_callback_fn = true;
7318         callee->callback_ret_range = tnum_range(0, 1);
7319         return 0;
7320 }
7321
7322 static int set_loop_callback_state(struct bpf_verifier_env *env,
7323                                    struct bpf_func_state *caller,
7324                                    struct bpf_func_state *callee,
7325                                    int insn_idx)
7326 {
7327         /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
7328          *          u64 flags);
7329          * callback_fn(u32 index, void *callback_ctx);
7330          */
7331         callee->regs[BPF_REG_1].type = SCALAR_VALUE;
7332         callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
7333
7334         /* unused */
7335         __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
7336         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
7337         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
7338
7339         callee->in_callback_fn = true;
7340         callee->callback_ret_range = tnum_range(0, 1);
7341         return 0;
7342 }
7343
7344 static int set_timer_callback_state(struct bpf_verifier_env *env,
7345                                     struct bpf_func_state *caller,
7346                                     struct bpf_func_state *callee,
7347                                     int insn_idx)
7348 {
7349         struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
7350
7351         /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
7352          * callback_fn(struct bpf_map *map, void *key, void *value);
7353          */
7354         callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
7355         __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
7356         callee->regs[BPF_REG_1].map_ptr = map_ptr;
7357
7358         callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
7359         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
7360         callee->regs[BPF_REG_2].map_ptr = map_ptr;
7361
7362         callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
7363         __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
7364         callee->regs[BPF_REG_3].map_ptr = map_ptr;
7365
7366         /* unused */
7367         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
7368         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
7369         callee->in_async_callback_fn = true;
7370         callee->callback_ret_range = tnum_range(0, 1);
7371         return 0;
7372 }
7373
7374 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
7375                                        struct bpf_func_state *caller,
7376                                        struct bpf_func_state *callee,
7377                                        int insn_idx)
7378 {
7379         /* bpf_find_vma(struct task_struct *task, u64 addr,
7380          *               void *callback_fn, void *callback_ctx, u64 flags)
7381          * (callback_fn)(struct task_struct *task,
7382          *               struct vm_area_struct *vma, void *callback_ctx);
7383          */
7384         callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
7385
7386         callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
7387         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
7388         callee->regs[BPF_REG_2].btf =  btf_vmlinux;
7389         callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
7390
7391         /* pointer to stack or null */
7392         callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
7393
7394         /* unused */
7395         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
7396         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
7397         callee->in_callback_fn = true;
7398         callee->callback_ret_range = tnum_range(0, 1);
7399         return 0;
7400 }
7401
7402 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
7403                                            struct bpf_func_state *caller,
7404                                            struct bpf_func_state *callee,
7405                                            int insn_idx)
7406 {
7407         /* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
7408          *                        callback_ctx, u64 flags);
7409          * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
7410          */
7411         __mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
7412         mark_dynptr_cb_reg(&callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
7413         callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
7414
7415         /* unused */
7416         __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
7417         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
7418         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
7419
7420         callee->in_callback_fn = true;
7421         callee->callback_ret_range = tnum_range(0, 1);
7422         return 0;
7423 }
7424
7425 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
7426 {
7427         struct bpf_verifier_state *state = env->cur_state;
7428         struct bpf_func_state *caller, *callee;
7429         struct bpf_reg_state *r0;
7430         int err;
7431
7432         callee = state->frame[state->curframe];
7433         r0 = &callee->regs[BPF_REG_0];
7434         if (r0->type == PTR_TO_STACK) {
7435                 /* technically it's ok to return caller's stack pointer
7436                  * (or caller's caller's pointer) back to the caller,
7437                  * since these pointers are valid. Only current stack
7438                  * pointer will be invalid as soon as function exits,
7439                  * but let's be conservative
7440                  */
7441                 verbose(env, "cannot return stack pointer to the caller\n");
7442                 return -EINVAL;
7443         }
7444
7445         caller = state->frame[state->curframe - 1];
7446         if (callee->in_callback_fn) {
7447                 /* enforce R0 return value range [0, 1]. */
7448                 struct tnum range = callee->callback_ret_range;
7449
7450                 if (r0->type != SCALAR_VALUE) {
7451                         verbose(env, "R0 not a scalar value\n");
7452                         return -EACCES;
7453                 }
7454                 if (!tnum_in(range, r0->var_off)) {
7455                         verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
7456                         return -EINVAL;
7457                 }
7458         } else {
7459                 /* return to the caller whatever r0 had in the callee */
7460                 caller->regs[BPF_REG_0] = *r0;
7461         }
7462
7463         /* callback_fn frame should have released its own additions to parent's
7464          * reference state at this point, or check_reference_leak would
7465          * complain, hence it must be the same as the caller. There is no need
7466          * to copy it back.
7467          */
7468         if (!callee->in_callback_fn) {
7469                 /* Transfer references to the caller */
7470                 err = copy_reference_state(caller, callee);
7471                 if (err)
7472                         return err;
7473         }
7474
7475         *insn_idx = callee->callsite + 1;
7476         if (env->log.level & BPF_LOG_LEVEL) {
7477                 verbose(env, "returning from callee:\n");
7478                 print_verifier_state(env, callee, true);
7479                 verbose(env, "to caller at %d:\n", *insn_idx);
7480                 print_verifier_state(env, caller, true);
7481         }
7482         /* clear everything in the callee */
7483         free_func_state(callee);
7484         state->frame[state->curframe--] = NULL;
7485         return 0;
7486 }
7487
7488 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
7489                                    int func_id,
7490                                    struct bpf_call_arg_meta *meta)
7491 {
7492         struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
7493
7494         if (ret_type != RET_INTEGER ||
7495             (func_id != BPF_FUNC_get_stack &&
7496              func_id != BPF_FUNC_get_task_stack &&
7497              func_id != BPF_FUNC_probe_read_str &&
7498              func_id != BPF_FUNC_probe_read_kernel_str &&
7499              func_id != BPF_FUNC_probe_read_user_str))
7500                 return;
7501
7502         ret_reg->smax_value = meta->msize_max_value;
7503         ret_reg->s32_max_value = meta->msize_max_value;
7504         ret_reg->smin_value = -MAX_ERRNO;
7505         ret_reg->s32_min_value = -MAX_ERRNO;
7506         reg_bounds_sync(ret_reg);
7507 }
7508
7509 static int
7510 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
7511                 int func_id, int insn_idx)
7512 {
7513         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
7514         struct bpf_map *map = meta->map_ptr;
7515
7516         if (func_id != BPF_FUNC_tail_call &&
7517             func_id != BPF_FUNC_map_lookup_elem &&
7518             func_id != BPF_FUNC_map_update_elem &&
7519             func_id != BPF_FUNC_map_delete_elem &&
7520             func_id != BPF_FUNC_map_push_elem &&
7521             func_id != BPF_FUNC_map_pop_elem &&
7522             func_id != BPF_FUNC_map_peek_elem &&
7523             func_id != BPF_FUNC_for_each_map_elem &&
7524             func_id != BPF_FUNC_redirect_map &&
7525             func_id != BPF_FUNC_map_lookup_percpu_elem)
7526                 return 0;
7527
7528         if (map == NULL) {
7529                 verbose(env, "kernel subsystem misconfigured verifier\n");
7530                 return -EINVAL;
7531         }
7532
7533         /* In case of read-only, some additional restrictions
7534          * need to be applied in order to prevent altering the
7535          * state of the map from program side.
7536          */
7537         if ((map->map_flags & BPF_F_RDONLY_PROG) &&
7538             (func_id == BPF_FUNC_map_delete_elem ||
7539              func_id == BPF_FUNC_map_update_elem ||
7540              func_id == BPF_FUNC_map_push_elem ||
7541              func_id == BPF_FUNC_map_pop_elem)) {
7542                 verbose(env, "write into map forbidden\n");
7543                 return -EACCES;
7544         }
7545
7546         if (!BPF_MAP_PTR(aux->map_ptr_state))
7547                 bpf_map_ptr_store(aux, meta->map_ptr,
7548                                   !meta->map_ptr->bypass_spec_v1);
7549         else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
7550                 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
7551                                   !meta->map_ptr->bypass_spec_v1);
7552         return 0;
7553 }
7554
7555 static int
7556 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
7557                 int func_id, int insn_idx)
7558 {
7559         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
7560         struct bpf_reg_state *regs = cur_regs(env), *reg;
7561         struct bpf_map *map = meta->map_ptr;
7562         u64 val, max;
7563         int err;
7564
7565         if (func_id != BPF_FUNC_tail_call)
7566                 return 0;
7567         if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
7568                 verbose(env, "kernel subsystem misconfigured verifier\n");
7569                 return -EINVAL;
7570         }
7571
7572         reg = &regs[BPF_REG_3];
7573         val = reg->var_off.value;
7574         max = map->max_entries;
7575
7576         if (!(register_is_const(reg) && val < max)) {
7577                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
7578                 return 0;
7579         }
7580
7581         err = mark_chain_precision(env, BPF_REG_3);
7582         if (err)
7583                 return err;
7584         if (bpf_map_key_unseen(aux))
7585                 bpf_map_key_store(aux, val);
7586         else if (!bpf_map_key_poisoned(aux) &&
7587                   bpf_map_key_immediate(aux) != val)
7588                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
7589         return 0;
7590 }
7591
7592 static int check_reference_leak(struct bpf_verifier_env *env)
7593 {
7594         struct bpf_func_state *state = cur_func(env);
7595         bool refs_lingering = false;
7596         int i;
7597
7598         if (state->frameno && !state->in_callback_fn)
7599                 return 0;
7600
7601         for (i = 0; i < state->acquired_refs; i++) {
7602                 if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
7603                         continue;
7604                 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
7605                         state->refs[i].id, state->refs[i].insn_idx);
7606                 refs_lingering = true;
7607         }
7608         return refs_lingering ? -EINVAL : 0;
7609 }
7610
7611 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
7612                                    struct bpf_reg_state *regs)
7613 {
7614         struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
7615         struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
7616         struct bpf_map *fmt_map = fmt_reg->map_ptr;
7617         int err, fmt_map_off, num_args;
7618         u64 fmt_addr;
7619         char *fmt;
7620
7621         /* data must be an array of u64 */
7622         if (data_len_reg->var_off.value % 8)
7623                 return -EINVAL;
7624         num_args = data_len_reg->var_off.value / 8;
7625
7626         /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
7627          * and map_direct_value_addr is set.
7628          */
7629         fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
7630         err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
7631                                                   fmt_map_off);
7632         if (err) {
7633                 verbose(env, "verifier bug\n");
7634                 return -EFAULT;
7635         }
7636         fmt = (char *)(long)fmt_addr + fmt_map_off;
7637
7638         /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
7639          * can focus on validating the format specifiers.
7640          */
7641         err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
7642         if (err < 0)
7643                 verbose(env, "Invalid format string\n");
7644
7645         return err;
7646 }
7647
7648 static int check_get_func_ip(struct bpf_verifier_env *env)
7649 {
7650         enum bpf_prog_type type = resolve_prog_type(env->prog);
7651         int func_id = BPF_FUNC_get_func_ip;
7652
7653         if (type == BPF_PROG_TYPE_TRACING) {
7654                 if (!bpf_prog_has_trampoline(env->prog)) {
7655                         verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
7656                                 func_id_name(func_id), func_id);
7657                         return -ENOTSUPP;
7658                 }
7659                 return 0;
7660         } else if (type == BPF_PROG_TYPE_KPROBE) {
7661                 return 0;
7662         }
7663
7664         verbose(env, "func %s#%d not supported for program type %d\n",
7665                 func_id_name(func_id), func_id, type);
7666         return -ENOTSUPP;
7667 }
7668
7669 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
7670 {
7671         return &env->insn_aux_data[env->insn_idx];
7672 }
7673
7674 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
7675 {
7676         struct bpf_reg_state *regs = cur_regs(env);
7677         struct bpf_reg_state *reg = &regs[BPF_REG_4];
7678         bool reg_is_null = register_is_null(reg);
7679
7680         if (reg_is_null)
7681                 mark_chain_precision(env, BPF_REG_4);
7682
7683         return reg_is_null;
7684 }
7685
7686 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
7687 {
7688         struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
7689
7690         if (!state->initialized) {
7691                 state->initialized = 1;
7692                 state->fit_for_inline = loop_flag_is_zero(env);
7693                 state->callback_subprogno = subprogno;
7694                 return;
7695         }
7696
7697         if (!state->fit_for_inline)
7698                 return;
7699
7700         state->fit_for_inline = (loop_flag_is_zero(env) &&
7701                                  state->callback_subprogno == subprogno);
7702 }
7703
7704 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
7705                              int *insn_idx_p)
7706 {
7707         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
7708         const struct bpf_func_proto *fn = NULL;
7709         enum bpf_return_type ret_type;
7710         enum bpf_type_flag ret_flag;
7711         struct bpf_reg_state *regs;
7712         struct bpf_call_arg_meta meta;
7713         int insn_idx = *insn_idx_p;
7714         bool changes_data;
7715         int i, err, func_id;
7716
7717         /* find function prototype */
7718         func_id = insn->imm;
7719         if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
7720                 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
7721                         func_id);
7722                 return -EINVAL;
7723         }
7724
7725         if (env->ops->get_func_proto)
7726                 fn = env->ops->get_func_proto(func_id, env->prog);
7727         if (!fn) {
7728                 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
7729                         func_id);
7730                 return -EINVAL;
7731         }
7732
7733         /* eBPF programs must be GPL compatible to use GPL-ed functions */
7734         if (!env->prog->gpl_compatible && fn->gpl_only) {
7735                 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
7736                 return -EINVAL;
7737         }
7738
7739         if (fn->allowed && !fn->allowed(env->prog)) {
7740                 verbose(env, "helper call is not allowed in probe\n");
7741                 return -EINVAL;
7742         }
7743
7744         if (!env->prog->aux->sleepable && fn->might_sleep) {
7745                 verbose(env, "helper call might sleep in a non-sleepable prog\n");
7746                 return -EINVAL;
7747         }
7748
7749         /* With LD_ABS/IND some JITs save/restore skb from r1. */
7750         changes_data = bpf_helper_changes_pkt_data(fn->func);
7751         if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
7752                 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
7753                         func_id_name(func_id), func_id);
7754                 return -EINVAL;
7755         }
7756
7757         memset(&meta, 0, sizeof(meta));
7758         meta.pkt_access = fn->pkt_access;
7759
7760         err = check_func_proto(fn, func_id);
7761         if (err) {
7762                 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
7763                         func_id_name(func_id), func_id);
7764                 return err;
7765         }
7766
7767         if (env->cur_state->active_rcu_lock) {
7768                 if (fn->might_sleep) {
7769                         verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
7770                                 func_id_name(func_id), func_id);
7771                         return -EINVAL;
7772                 }
7773
7774                 if (env->prog->aux->sleepable && is_storage_get_function(func_id))
7775                         env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
7776         }
7777
7778         meta.func_id = func_id;
7779         /* check args */
7780         for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
7781                 err = check_func_arg(env, i, &meta, fn);
7782                 if (err)
7783                         return err;
7784         }
7785
7786         err = record_func_map(env, &meta, func_id, insn_idx);
7787         if (err)
7788                 return err;
7789
7790         err = record_func_key(env, &meta, func_id, insn_idx);
7791         if (err)
7792                 return err;
7793
7794         /* Mark slots with STACK_MISC in case of raw mode, stack offset
7795          * is inferred from register state.
7796          */
7797         for (i = 0; i < meta.access_size; i++) {
7798                 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
7799                                        BPF_WRITE, -1, false);
7800                 if (err)
7801                         return err;
7802         }
7803
7804         regs = cur_regs(env);
7805
7806         /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
7807          * be reinitialized by any dynptr helper. Hence, mark_stack_slots_dynptr
7808          * is safe to do directly.
7809          */
7810         if (meta.uninit_dynptr_regno) {
7811                 if (regs[meta.uninit_dynptr_regno].type == CONST_PTR_TO_DYNPTR) {
7812                         verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be initialized\n");
7813                         return -EFAULT;
7814                 }
7815                 /* we write BPF_DW bits (8 bytes) at a time */
7816                 for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
7817                         err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
7818                                                i, BPF_DW, BPF_WRITE, -1, false);
7819                         if (err)
7820                                 return err;
7821                 }
7822
7823                 err = mark_stack_slots_dynptr(env, &regs[meta.uninit_dynptr_regno],
7824                                               fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1],
7825                                               insn_idx);
7826                 if (err)
7827                         return err;
7828         }
7829
7830         if (meta.release_regno) {
7831                 err = -EINVAL;
7832                 /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
7833                  * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
7834                  * is safe to do directly.
7835                  */
7836                 if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
7837                         if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
7838                                 verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
7839                                 return -EFAULT;
7840                         }
7841                         err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
7842                 } else if (meta.ref_obj_id) {
7843                         err = release_reference(env, meta.ref_obj_id);
7844                 } else if (register_is_null(&regs[meta.release_regno])) {
7845                         /* meta.ref_obj_id can only be 0 if register that is meant to be
7846                          * released is NULL, which must be > R0.
7847                          */
7848                         err = 0;
7849                 }
7850                 if (err) {
7851                         verbose(env, "func %s#%d reference has not been acquired before\n",
7852                                 func_id_name(func_id), func_id);
7853                         return err;
7854                 }
7855         }
7856
7857         switch (func_id) {
7858         case BPF_FUNC_tail_call:
7859                 err = check_reference_leak(env);
7860                 if (err) {
7861                         verbose(env, "tail_call would lead to reference leak\n");
7862                         return err;
7863                 }
7864                 break;
7865         case BPF_FUNC_get_local_storage:
7866                 /* check that flags argument in get_local_storage(map, flags) is 0,
7867                  * this is required because get_local_storage() can't return an error.
7868                  */
7869                 if (!register_is_null(&regs[BPF_REG_2])) {
7870                         verbose(env, "get_local_storage() doesn't support non-zero flags\n");
7871                         return -EINVAL;
7872                 }
7873                 break;
7874         case BPF_FUNC_for_each_map_elem:
7875                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7876                                         set_map_elem_callback_state);
7877                 break;
7878         case BPF_FUNC_timer_set_callback:
7879                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7880                                         set_timer_callback_state);
7881                 break;
7882         case BPF_FUNC_find_vma:
7883                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7884                                         set_find_vma_callback_state);
7885                 break;
7886         case BPF_FUNC_snprintf:
7887                 err = check_bpf_snprintf_call(env, regs);
7888                 break;
7889         case BPF_FUNC_loop:
7890                 update_loop_inline_state(env, meta.subprogno);
7891                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7892                                         set_loop_callback_state);
7893                 break;
7894         case BPF_FUNC_dynptr_from_mem:
7895                 if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
7896                         verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
7897                                 reg_type_str(env, regs[BPF_REG_1].type));
7898                         return -EACCES;
7899                 }
7900                 break;
7901         case BPF_FUNC_set_retval:
7902                 if (prog_type == BPF_PROG_TYPE_LSM &&
7903                     env->prog->expected_attach_type == BPF_LSM_CGROUP) {
7904                         if (!env->prog->aux->attach_func_proto->type) {
7905                                 /* Make sure programs that attach to void
7906                                  * hooks don't try to modify return value.
7907                                  */
7908                                 verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
7909                                 return -EINVAL;
7910                         }
7911                 }
7912                 break;
7913         case BPF_FUNC_dynptr_data:
7914                 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
7915                         if (arg_type_is_dynptr(fn->arg_type[i])) {
7916                                 struct bpf_reg_state *reg = &regs[BPF_REG_1 + i];
7917
7918                                 if (meta.ref_obj_id) {
7919                                         verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
7920                                         return -EFAULT;
7921                                 }
7922
7923                                 meta.ref_obj_id = dynptr_ref_obj_id(env, reg);
7924                                 break;
7925                         }
7926                 }
7927                 if (i == MAX_BPF_FUNC_REG_ARGS) {
7928                         verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n");
7929                         return -EFAULT;
7930                 }
7931                 break;
7932         case BPF_FUNC_user_ringbuf_drain:
7933                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7934                                         set_user_ringbuf_callback_state);
7935                 break;
7936         }
7937
7938         if (err)
7939                 return err;
7940
7941         /* reset caller saved regs */
7942         for (i = 0; i < CALLER_SAVED_REGS; i++) {
7943                 mark_reg_not_init(env, regs, caller_saved[i]);
7944                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
7945         }
7946
7947         /* helper call returns 64-bit value. */
7948         regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
7949
7950         /* update return register (already marked as written above) */
7951         ret_type = fn->ret_type;
7952         ret_flag = type_flag(ret_type);
7953
7954         switch (base_type(ret_type)) {
7955         case RET_INTEGER:
7956                 /* sets type to SCALAR_VALUE */
7957                 mark_reg_unknown(env, regs, BPF_REG_0);
7958                 break;
7959         case RET_VOID:
7960                 regs[BPF_REG_0].type = NOT_INIT;
7961                 break;
7962         case RET_PTR_TO_MAP_VALUE:
7963                 /* There is no offset yet applied, variable or fixed */
7964                 mark_reg_known_zero(env, regs, BPF_REG_0);
7965                 /* remember map_ptr, so that check_map_access()
7966                  * can check 'value_size' boundary of memory access
7967                  * to map element returned from bpf_map_lookup_elem()
7968                  */
7969                 if (meta.map_ptr == NULL) {
7970                         verbose(env,
7971                                 "kernel subsystem misconfigured verifier\n");
7972                         return -EINVAL;
7973                 }
7974                 regs[BPF_REG_0].map_ptr = meta.map_ptr;
7975                 regs[BPF_REG_0].map_uid = meta.map_uid;
7976                 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
7977                 if (!type_may_be_null(ret_type) &&
7978                     btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) {
7979                         regs[BPF_REG_0].id = ++env->id_gen;
7980                 }
7981                 break;
7982         case RET_PTR_TO_SOCKET:
7983                 mark_reg_known_zero(env, regs, BPF_REG_0);
7984                 regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
7985                 break;
7986         case RET_PTR_TO_SOCK_COMMON:
7987                 mark_reg_known_zero(env, regs, BPF_REG_0);
7988                 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
7989                 break;
7990         case RET_PTR_TO_TCP_SOCK:
7991                 mark_reg_known_zero(env, regs, BPF_REG_0);
7992                 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
7993                 break;
7994         case RET_PTR_TO_MEM:
7995                 mark_reg_known_zero(env, regs, BPF_REG_0);
7996                 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
7997                 regs[BPF_REG_0].mem_size = meta.mem_size;
7998                 break;
7999         case RET_PTR_TO_MEM_OR_BTF_ID:
8000         {
8001                 const struct btf_type *t;
8002
8003                 mark_reg_known_zero(env, regs, BPF_REG_0);
8004                 t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
8005                 if (!btf_type_is_struct(t)) {
8006                         u32 tsize;
8007                         const struct btf_type *ret;
8008                         const char *tname;
8009
8010                         /* resolve the type size of ksym. */
8011                         ret = btf_resolve_size(meta.ret_btf, t, &tsize);
8012                         if (IS_ERR(ret)) {
8013                                 tname = btf_name_by_offset(meta.ret_btf, t->name_off);
8014                                 verbose(env, "unable to resolve the size of type '%s': %ld\n",
8015                                         tname, PTR_ERR(ret));
8016                                 return -EINVAL;
8017                         }
8018                         regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
8019                         regs[BPF_REG_0].mem_size = tsize;
8020                 } else {
8021                         /* MEM_RDONLY may be carried from ret_flag, but it
8022                          * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
8023                          * it will confuse the check of PTR_TO_BTF_ID in
8024                          * check_mem_access().
8025                          */
8026                         ret_flag &= ~MEM_RDONLY;
8027
8028                         regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
8029                         regs[BPF_REG_0].btf = meta.ret_btf;
8030                         regs[BPF_REG_0].btf_id = meta.ret_btf_id;
8031                 }
8032                 break;
8033         }
8034         case RET_PTR_TO_BTF_ID:
8035         {
8036                 struct btf *ret_btf;
8037                 int ret_btf_id;
8038
8039                 mark_reg_known_zero(env, regs, BPF_REG_0);
8040                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
8041                 if (func_id == BPF_FUNC_kptr_xchg) {
8042                         ret_btf = meta.kptr_field->kptr.btf;
8043                         ret_btf_id = meta.kptr_field->kptr.btf_id;
8044                 } else {
8045                         if (fn->ret_btf_id == BPF_PTR_POISON) {
8046                                 verbose(env, "verifier internal error:");
8047                                 verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n",
8048                                         func_id_name(func_id));
8049                                 return -EINVAL;
8050                         }
8051                         ret_btf = btf_vmlinux;
8052                         ret_btf_id = *fn->ret_btf_id;
8053                 }
8054                 if (ret_btf_id == 0) {
8055                         verbose(env, "invalid return type %u of func %s#%d\n",
8056                                 base_type(ret_type), func_id_name(func_id),
8057                                 func_id);
8058                         return -EINVAL;
8059                 }
8060                 regs[BPF_REG_0].btf = ret_btf;
8061                 regs[BPF_REG_0].btf_id = ret_btf_id;
8062                 break;
8063         }
8064         default:
8065                 verbose(env, "unknown return type %u of func %s#%d\n",
8066                         base_type(ret_type), func_id_name(func_id), func_id);
8067                 return -EINVAL;
8068         }
8069
8070         if (type_may_be_null(regs[BPF_REG_0].type))
8071                 regs[BPF_REG_0].id = ++env->id_gen;
8072
8073         if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
8074                 verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
8075                         func_id_name(func_id), func_id);
8076                 return -EFAULT;
8077         }
8078
8079         if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
8080                 /* For release_reference() */
8081                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
8082         } else if (is_acquire_function(func_id, meta.map_ptr)) {
8083                 int id = acquire_reference_state(env, insn_idx);
8084
8085                 if (id < 0)
8086                         return id;
8087                 /* For mark_ptr_or_null_reg() */
8088                 regs[BPF_REG_0].id = id;
8089                 /* For release_reference() */
8090                 regs[BPF_REG_0].ref_obj_id = id;
8091         }
8092
8093         do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
8094
8095         err = check_map_func_compatibility(env, meta.map_ptr, func_id);
8096         if (err)
8097                 return err;
8098
8099         if ((func_id == BPF_FUNC_get_stack ||
8100              func_id == BPF_FUNC_get_task_stack) &&
8101             !env->prog->has_callchain_buf) {
8102                 const char *err_str;
8103
8104 #ifdef CONFIG_PERF_EVENTS
8105                 err = get_callchain_buffers(sysctl_perf_event_max_stack);
8106                 err_str = "cannot get callchain buffer for func %s#%d\n";
8107 #else
8108                 err = -ENOTSUPP;
8109                 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
8110 #endif
8111                 if (err) {
8112                         verbose(env, err_str, func_id_name(func_id), func_id);
8113                         return err;
8114                 }
8115
8116                 env->prog->has_callchain_buf = true;
8117         }
8118
8119         if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
8120                 env->prog->call_get_stack = true;
8121
8122         if (func_id == BPF_FUNC_get_func_ip) {
8123                 if (check_get_func_ip(env))
8124                         return -ENOTSUPP;
8125                 env->prog->call_get_func_ip = true;
8126         }
8127
8128         if (changes_data)
8129                 clear_all_pkt_pointers(env);
8130         return 0;
8131 }
8132
8133 /* mark_btf_func_reg_size() is used when the reg size is determined by
8134  * the BTF func_proto's return value size and argument.
8135  */
8136 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
8137                                    size_t reg_size)
8138 {
8139         struct bpf_reg_state *reg = &cur_regs(env)[regno];
8140
8141         if (regno == BPF_REG_0) {
8142                 /* Function return value */
8143                 reg->live |= REG_LIVE_WRITTEN;
8144                 reg->subreg_def = reg_size == sizeof(u64) ?
8145                         DEF_NOT_SUBREG : env->insn_idx + 1;
8146         } else {
8147                 /* Function argument */
8148                 if (reg_size == sizeof(u64)) {
8149                         mark_insn_zext(env, reg);
8150                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
8151                 } else {
8152                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
8153                 }
8154         }
8155 }
8156
8157 struct bpf_kfunc_call_arg_meta {
8158         /* In parameters */
8159         struct btf *btf;
8160         u32 func_id;
8161         u32 kfunc_flags;
8162         const struct btf_type *func_proto;
8163         const char *func_name;
8164         /* Out parameters */
8165         u32 ref_obj_id;
8166         u8 release_regno;
8167         bool r0_rdonly;
8168         u32 ret_btf_id;
8169         u64 r0_size;
8170         struct {
8171                 u64 value;
8172                 bool found;
8173         } arg_constant;
8174         struct {
8175                 struct btf *btf;
8176                 u32 btf_id;
8177         } arg_obj_drop;
8178         struct {
8179                 struct btf_field *field;
8180         } arg_list_head;
8181 };
8182
8183 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
8184 {
8185         return meta->kfunc_flags & KF_ACQUIRE;
8186 }
8187
8188 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
8189 {
8190         return meta->kfunc_flags & KF_RET_NULL;
8191 }
8192
8193 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
8194 {
8195         return meta->kfunc_flags & KF_RELEASE;
8196 }
8197
8198 static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
8199 {
8200         return meta->kfunc_flags & KF_TRUSTED_ARGS;
8201 }
8202
8203 static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
8204 {
8205         return meta->kfunc_flags & KF_SLEEPABLE;
8206 }
8207
8208 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
8209 {
8210         return meta->kfunc_flags & KF_DESTRUCTIVE;
8211 }
8212
8213 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
8214 {
8215         return meta->kfunc_flags & KF_RCU;
8216 }
8217
8218 static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
8219 {
8220         return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
8221 }
8222
8223 static bool __kfunc_param_match_suffix(const struct btf *btf,
8224                                        const struct btf_param *arg,
8225                                        const char *suffix)
8226 {
8227         int suffix_len = strlen(suffix), len;
8228         const char *param_name;
8229
8230         /* In the future, this can be ported to use BTF tagging */
8231         param_name = btf_name_by_offset(btf, arg->name_off);
8232         if (str_is_empty(param_name))
8233                 return false;
8234         len = strlen(param_name);
8235         if (len < suffix_len)
8236                 return false;
8237         param_name += len - suffix_len;
8238         return !strncmp(param_name, suffix, suffix_len);
8239 }
8240
8241 static bool is_kfunc_arg_mem_size(const struct btf *btf,
8242                                   const struct btf_param *arg,
8243                                   const struct bpf_reg_state *reg)
8244 {
8245         const struct btf_type *t;
8246
8247         t = btf_type_skip_modifiers(btf, arg->type, NULL);
8248         if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
8249                 return false;
8250
8251         return __kfunc_param_match_suffix(btf, arg, "__sz");
8252 }
8253
8254 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
8255 {
8256         return __kfunc_param_match_suffix(btf, arg, "__k");
8257 }
8258
8259 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
8260 {
8261         return __kfunc_param_match_suffix(btf, arg, "__ign");
8262 }
8263
8264 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
8265 {
8266         return __kfunc_param_match_suffix(btf, arg, "__alloc");
8267 }
8268
8269 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
8270                                           const struct btf_param *arg,
8271                                           const char *name)
8272 {
8273         int len, target_len = strlen(name);
8274         const char *param_name;
8275
8276         param_name = btf_name_by_offset(btf, arg->name_off);
8277         if (str_is_empty(param_name))
8278                 return false;
8279         len = strlen(param_name);
8280         if (len != target_len)
8281                 return false;
8282         if (strcmp(param_name, name))
8283                 return false;
8284
8285         return true;
8286 }
8287
8288 enum {
8289         KF_ARG_DYNPTR_ID,
8290         KF_ARG_LIST_HEAD_ID,
8291         KF_ARG_LIST_NODE_ID,
8292 };
8293
8294 BTF_ID_LIST(kf_arg_btf_ids)
8295 BTF_ID(struct, bpf_dynptr_kern)
8296 BTF_ID(struct, bpf_list_head)
8297 BTF_ID(struct, bpf_list_node)
8298
8299 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
8300                                     const struct btf_param *arg, int type)
8301 {
8302         const struct btf_type *t;
8303         u32 res_id;
8304
8305         t = btf_type_skip_modifiers(btf, arg->type, NULL);
8306         if (!t)
8307                 return false;
8308         if (!btf_type_is_ptr(t))
8309                 return false;
8310         t = btf_type_skip_modifiers(btf, t->type, &res_id);
8311         if (!t)
8312                 return false;
8313         return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
8314 }
8315
8316 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
8317 {
8318         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
8319 }
8320
8321 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
8322 {
8323         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
8324 }
8325
8326 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
8327 {
8328         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
8329 }
8330
8331 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
8332 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
8333                                         const struct btf *btf,
8334                                         const struct btf_type *t, int rec)
8335 {
8336         const struct btf_type *member_type;
8337         const struct btf_member *member;
8338         u32 i;
8339
8340         if (!btf_type_is_struct(t))
8341                 return false;
8342
8343         for_each_member(i, t, member) {
8344                 const struct btf_array *array;
8345
8346                 member_type = btf_type_skip_modifiers(btf, member->type, NULL);
8347                 if (btf_type_is_struct(member_type)) {
8348                         if (rec >= 3) {
8349                                 verbose(env, "max struct nesting depth exceeded\n");
8350                                 return false;
8351                         }
8352                         if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
8353                                 return false;
8354                         continue;
8355                 }
8356                 if (btf_type_is_array(member_type)) {
8357                         array = btf_array(member_type);
8358                         if (!array->nelems)
8359                                 return false;
8360                         member_type = btf_type_skip_modifiers(btf, array->type, NULL);
8361                         if (!btf_type_is_scalar(member_type))
8362                                 return false;
8363                         continue;
8364                 }
8365                 if (!btf_type_is_scalar(member_type))
8366                         return false;
8367         }
8368         return true;
8369 }
8370
8371
8372 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
8373 #ifdef CONFIG_NET
8374         [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
8375         [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
8376         [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
8377 #endif
8378 };
8379
8380 enum kfunc_ptr_arg_type {
8381         KF_ARG_PTR_TO_CTX,
8382         KF_ARG_PTR_TO_ALLOC_BTF_ID,  /* Allocated object */
8383         KF_ARG_PTR_TO_KPTR,          /* PTR_TO_KPTR but type specific */
8384         KF_ARG_PTR_TO_DYNPTR,
8385         KF_ARG_PTR_TO_LIST_HEAD,
8386         KF_ARG_PTR_TO_LIST_NODE,
8387         KF_ARG_PTR_TO_BTF_ID,        /* Also covers reg2btf_ids conversions */
8388         KF_ARG_PTR_TO_MEM,
8389         KF_ARG_PTR_TO_MEM_SIZE,      /* Size derived from next argument, skip it */
8390 };
8391
8392 enum special_kfunc_type {
8393         KF_bpf_obj_new_impl,
8394         KF_bpf_obj_drop_impl,
8395         KF_bpf_list_push_front,
8396         KF_bpf_list_push_back,
8397         KF_bpf_list_pop_front,
8398         KF_bpf_list_pop_back,
8399         KF_bpf_cast_to_kern_ctx,
8400         KF_bpf_rdonly_cast,
8401         KF_bpf_rcu_read_lock,
8402         KF_bpf_rcu_read_unlock,
8403 };
8404
8405 BTF_SET_START(special_kfunc_set)
8406 BTF_ID(func, bpf_obj_new_impl)
8407 BTF_ID(func, bpf_obj_drop_impl)
8408 BTF_ID(func, bpf_list_push_front)
8409 BTF_ID(func, bpf_list_push_back)
8410 BTF_ID(func, bpf_list_pop_front)
8411 BTF_ID(func, bpf_list_pop_back)
8412 BTF_ID(func, bpf_cast_to_kern_ctx)
8413 BTF_ID(func, bpf_rdonly_cast)
8414 BTF_SET_END(special_kfunc_set)
8415
8416 BTF_ID_LIST(special_kfunc_list)
8417 BTF_ID(func, bpf_obj_new_impl)
8418 BTF_ID(func, bpf_obj_drop_impl)
8419 BTF_ID(func, bpf_list_push_front)
8420 BTF_ID(func, bpf_list_push_back)
8421 BTF_ID(func, bpf_list_pop_front)
8422 BTF_ID(func, bpf_list_pop_back)
8423 BTF_ID(func, bpf_cast_to_kern_ctx)
8424 BTF_ID(func, bpf_rdonly_cast)
8425 BTF_ID(func, bpf_rcu_read_lock)
8426 BTF_ID(func, bpf_rcu_read_unlock)
8427
8428 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
8429 {
8430         return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
8431 }
8432
8433 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
8434 {
8435         return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
8436 }
8437
8438 static enum kfunc_ptr_arg_type
8439 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
8440                        struct bpf_kfunc_call_arg_meta *meta,
8441                        const struct btf_type *t, const struct btf_type *ref_t,
8442                        const char *ref_tname, const struct btf_param *args,
8443                        int argno, int nargs)
8444 {
8445         u32 regno = argno + 1;
8446         struct bpf_reg_state *regs = cur_regs(env);
8447         struct bpf_reg_state *reg = &regs[regno];
8448         bool arg_mem_size = false;
8449
8450         if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
8451                 return KF_ARG_PTR_TO_CTX;
8452
8453         /* In this function, we verify the kfunc's BTF as per the argument type,
8454          * leaving the rest of the verification with respect to the register
8455          * type to our caller. When a set of conditions hold in the BTF type of
8456          * arguments, we resolve it to a known kfunc_ptr_arg_type.
8457          */
8458         if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
8459                 return KF_ARG_PTR_TO_CTX;
8460
8461         if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
8462                 return KF_ARG_PTR_TO_ALLOC_BTF_ID;
8463
8464         if (is_kfunc_arg_kptr_get(meta, argno)) {
8465                 if (!btf_type_is_ptr(ref_t)) {
8466                         verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n");
8467                         return -EINVAL;
8468                 }
8469                 ref_t = btf_type_by_id(meta->btf, ref_t->type);
8470                 ref_tname = btf_name_by_offset(meta->btf, ref_t->name_off);
8471                 if (!btf_type_is_struct(ref_t)) {
8472                         verbose(env, "kernel function %s args#0 pointer type %s %s is not supported\n",
8473                                 meta->func_name, btf_type_str(ref_t), ref_tname);
8474                         return -EINVAL;
8475                 }
8476                 return KF_ARG_PTR_TO_KPTR;
8477         }
8478
8479         if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
8480                 return KF_ARG_PTR_TO_DYNPTR;
8481
8482         if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
8483                 return KF_ARG_PTR_TO_LIST_HEAD;
8484
8485         if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
8486                 return KF_ARG_PTR_TO_LIST_NODE;
8487
8488         if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
8489                 if (!btf_type_is_struct(ref_t)) {
8490                         verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
8491                                 meta->func_name, argno, btf_type_str(ref_t), ref_tname);
8492                         return -EINVAL;
8493                 }
8494                 return KF_ARG_PTR_TO_BTF_ID;
8495         }
8496
8497         if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]))
8498                 arg_mem_size = true;
8499
8500         /* This is the catch all argument type of register types supported by
8501          * check_helper_mem_access. However, we only allow when argument type is
8502          * pointer to scalar, or struct composed (recursively) of scalars. When
8503          * arg_mem_size is true, the pointer can be void *.
8504          */
8505         if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
8506             (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
8507                 verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
8508                         argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
8509                 return -EINVAL;
8510         }
8511         return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
8512 }
8513
8514 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
8515                                         struct bpf_reg_state *reg,
8516                                         const struct btf_type *ref_t,
8517                                         const char *ref_tname, u32 ref_id,
8518                                         struct bpf_kfunc_call_arg_meta *meta,
8519                                         int argno)
8520 {
8521         const struct btf_type *reg_ref_t;
8522         bool strict_type_match = false;
8523         const struct btf *reg_btf;
8524         const char *reg_ref_tname;
8525         u32 reg_ref_id;
8526
8527         if (base_type(reg->type) == PTR_TO_BTF_ID) {
8528                 reg_btf = reg->btf;
8529                 reg_ref_id = reg->btf_id;
8530         } else {
8531                 reg_btf = btf_vmlinux;
8532                 reg_ref_id = *reg2btf_ids[base_type(reg->type)];
8533         }
8534
8535         if (is_kfunc_trusted_args(meta) || (is_kfunc_release(meta) && reg->ref_obj_id))
8536                 strict_type_match = true;
8537
8538         reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
8539         reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
8540         if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) {
8541                 verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
8542                         meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
8543                         btf_type_str(reg_ref_t), reg_ref_tname);
8544                 return -EINVAL;
8545         }
8546         return 0;
8547 }
8548
8549 static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
8550                                       struct bpf_reg_state *reg,
8551                                       const struct btf_type *ref_t,
8552                                       const char *ref_tname,
8553                                       struct bpf_kfunc_call_arg_meta *meta,
8554                                       int argno)
8555 {
8556         struct btf_field *kptr_field;
8557
8558         /* check_func_arg_reg_off allows var_off for
8559          * PTR_TO_MAP_VALUE, but we need fixed offset to find
8560          * off_desc.
8561          */
8562         if (!tnum_is_const(reg->var_off)) {
8563                 verbose(env, "arg#0 must have constant offset\n");
8564                 return -EINVAL;
8565         }
8566
8567         kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR);
8568         if (!kptr_field || kptr_field->type != BPF_KPTR_REF) {
8569                 verbose(env, "arg#0 no referenced kptr at map value offset=%llu\n",
8570                         reg->off + reg->var_off.value);
8571                 return -EINVAL;
8572         }
8573
8574         if (!btf_struct_ids_match(&env->log, meta->btf, ref_t->type, 0, kptr_field->kptr.btf,
8575                                   kptr_field->kptr.btf_id, true)) {
8576                 verbose(env, "kernel function %s args#%d expected pointer to %s %s\n",
8577                         meta->func_name, argno, btf_type_str(ref_t), ref_tname);
8578                 return -EINVAL;
8579         }
8580         return 0;
8581 }
8582
8583 static int ref_set_release_on_unlock(struct bpf_verifier_env *env, u32 ref_obj_id)
8584 {
8585         struct bpf_func_state *state = cur_func(env);
8586         struct bpf_reg_state *reg;
8587         int i;
8588
8589         /* bpf_spin_lock only allows calling list_push and list_pop, no BPF
8590          * subprogs, no global functions. This means that the references would
8591          * not be released inside the critical section but they may be added to
8592          * the reference state, and the acquired_refs are never copied out for a
8593          * different frame as BPF to BPF calls don't work in bpf_spin_lock
8594          * critical sections.
8595          */
8596         if (!ref_obj_id) {
8597                 verbose(env, "verifier internal error: ref_obj_id is zero for release_on_unlock\n");
8598                 return -EFAULT;
8599         }
8600         for (i = 0; i < state->acquired_refs; i++) {
8601                 if (state->refs[i].id == ref_obj_id) {
8602                         if (state->refs[i].release_on_unlock) {
8603                                 verbose(env, "verifier internal error: expected false release_on_unlock");
8604                                 return -EFAULT;
8605                         }
8606                         state->refs[i].release_on_unlock = true;
8607                         /* Now mark everyone sharing same ref_obj_id as untrusted */
8608                         bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
8609                                 if (reg->ref_obj_id == ref_obj_id)
8610                                         reg->type |= PTR_UNTRUSTED;
8611                         }));
8612                         return 0;
8613                 }
8614         }
8615         verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
8616         return -EFAULT;
8617 }
8618
8619 /* Implementation details:
8620  *
8621  * Each register points to some region of memory, which we define as an
8622  * allocation. Each allocation may embed a bpf_spin_lock which protects any
8623  * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
8624  * allocation. The lock and the data it protects are colocated in the same
8625  * memory region.
8626  *
8627  * Hence, everytime a register holds a pointer value pointing to such
8628  * allocation, the verifier preserves a unique reg->id for it.
8629  *
8630  * The verifier remembers the lock 'ptr' and the lock 'id' whenever
8631  * bpf_spin_lock is called.
8632  *
8633  * To enable this, lock state in the verifier captures two values:
8634  *      active_lock.ptr = Register's type specific pointer
8635  *      active_lock.id  = A unique ID for each register pointer value
8636  *
8637  * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
8638  * supported register types.
8639  *
8640  * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
8641  * allocated objects is the reg->btf pointer.
8642  *
8643  * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
8644  * can establish the provenance of the map value statically for each distinct
8645  * lookup into such maps. They always contain a single map value hence unique
8646  * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
8647  *
8648  * So, in case of global variables, they use array maps with max_entries = 1,
8649  * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
8650  * into the same map value as max_entries is 1, as described above).
8651  *
8652  * In case of inner map lookups, the inner map pointer has same map_ptr as the
8653  * outer map pointer (in verifier context), but each lookup into an inner map
8654  * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
8655  * maps from the same outer map share the same map_ptr as active_lock.ptr, they
8656  * will get different reg->id assigned to each lookup, hence different
8657  * active_lock.id.
8658  *
8659  * In case of allocated objects, active_lock.ptr is the reg->btf, and the
8660  * reg->id is a unique ID preserved after the NULL pointer check on the pointer
8661  * returned from bpf_obj_new. Each allocation receives a new reg->id.
8662  */
8663 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
8664 {
8665         void *ptr;
8666         u32 id;
8667
8668         switch ((int)reg->type) {
8669         case PTR_TO_MAP_VALUE:
8670                 ptr = reg->map_ptr;
8671                 break;
8672         case PTR_TO_BTF_ID | MEM_ALLOC:
8673         case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
8674                 ptr = reg->btf;
8675                 break;
8676         default:
8677                 verbose(env, "verifier internal error: unknown reg type for lock check\n");
8678                 return -EFAULT;
8679         }
8680         id = reg->id;
8681
8682         if (!env->cur_state->active_lock.ptr)
8683                 return -EINVAL;
8684         if (env->cur_state->active_lock.ptr != ptr ||
8685             env->cur_state->active_lock.id != id) {
8686                 verbose(env, "held lock and object are not in the same allocation\n");
8687                 return -EINVAL;
8688         }
8689         return 0;
8690 }
8691
8692 static bool is_bpf_list_api_kfunc(u32 btf_id)
8693 {
8694         return btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
8695                btf_id == special_kfunc_list[KF_bpf_list_push_back] ||
8696                btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
8697                btf_id == special_kfunc_list[KF_bpf_list_pop_back];
8698 }
8699
8700 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
8701                                            struct bpf_reg_state *reg, u32 regno,
8702                                            struct bpf_kfunc_call_arg_meta *meta)
8703 {
8704         struct btf_field *field;
8705         struct btf_record *rec;
8706         u32 list_head_off;
8707
8708         if (meta->btf != btf_vmlinux || !is_bpf_list_api_kfunc(meta->func_id)) {
8709                 verbose(env, "verifier internal error: bpf_list_head argument for unknown kfunc\n");
8710                 return -EFAULT;
8711         }
8712
8713         if (!tnum_is_const(reg->var_off)) {
8714                 verbose(env,
8715                         "R%d doesn't have constant offset. bpf_list_head has to be at the constant offset\n",
8716                         regno);
8717                 return -EINVAL;
8718         }
8719
8720         rec = reg_btf_record(reg);
8721         list_head_off = reg->off + reg->var_off.value;
8722         field = btf_record_find(rec, list_head_off, BPF_LIST_HEAD);
8723         if (!field) {
8724                 verbose(env, "bpf_list_head not found at offset=%u\n", list_head_off);
8725                 return -EINVAL;
8726         }
8727
8728         /* All functions require bpf_list_head to be protected using a bpf_spin_lock */
8729         if (check_reg_allocation_locked(env, reg)) {
8730                 verbose(env, "bpf_spin_lock at off=%d must be held for bpf_list_head\n",
8731                         rec->spin_lock_off);
8732                 return -EINVAL;
8733         }
8734
8735         if (meta->arg_list_head.field) {
8736                 verbose(env, "verifier internal error: repeating bpf_list_head arg\n");
8737                 return -EFAULT;
8738         }
8739         meta->arg_list_head.field = field;
8740         return 0;
8741 }
8742
8743 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
8744                                            struct bpf_reg_state *reg, u32 regno,
8745                                            struct bpf_kfunc_call_arg_meta *meta)
8746 {
8747         const struct btf_type *et, *t;
8748         struct btf_field *field;
8749         struct btf_record *rec;
8750         u32 list_node_off;
8751
8752         if (meta->btf != btf_vmlinux ||
8753             (meta->func_id != special_kfunc_list[KF_bpf_list_push_front] &&
8754              meta->func_id != special_kfunc_list[KF_bpf_list_push_back])) {
8755                 verbose(env, "verifier internal error: bpf_list_node argument for unknown kfunc\n");
8756                 return -EFAULT;
8757         }
8758
8759         if (!tnum_is_const(reg->var_off)) {
8760                 verbose(env,
8761                         "R%d doesn't have constant offset. bpf_list_node has to be at the constant offset\n",
8762                         regno);
8763                 return -EINVAL;
8764         }
8765
8766         rec = reg_btf_record(reg);
8767         list_node_off = reg->off + reg->var_off.value;
8768         field = btf_record_find(rec, list_node_off, BPF_LIST_NODE);
8769         if (!field || field->offset != list_node_off) {
8770                 verbose(env, "bpf_list_node not found at offset=%u\n", list_node_off);
8771                 return -EINVAL;
8772         }
8773
8774         field = meta->arg_list_head.field;
8775
8776         et = btf_type_by_id(field->list_head.btf, field->list_head.value_btf_id);
8777         t = btf_type_by_id(reg->btf, reg->btf_id);
8778         if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->list_head.btf,
8779                                   field->list_head.value_btf_id, true)) {
8780                 verbose(env, "operation on bpf_list_head expects arg#1 bpf_list_node at offset=%d "
8781                         "in struct %s, but arg is at offset=%d in struct %s\n",
8782                         field->list_head.node_offset, btf_name_by_offset(field->list_head.btf, et->name_off),
8783                         list_node_off, btf_name_by_offset(reg->btf, t->name_off));
8784                 return -EINVAL;
8785         }
8786
8787         if (list_node_off != field->list_head.node_offset) {
8788                 verbose(env, "arg#1 offset=%d, but expected bpf_list_node at offset=%d in struct %s\n",
8789                         list_node_off, field->list_head.node_offset,
8790                         btf_name_by_offset(field->list_head.btf, et->name_off));
8791                 return -EINVAL;
8792         }
8793         /* Set arg#1 for expiration after unlock */
8794         return ref_set_release_on_unlock(env, reg->ref_obj_id);
8795 }
8796
8797 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta)
8798 {
8799         const char *func_name = meta->func_name, *ref_tname;
8800         const struct btf *btf = meta->btf;
8801         const struct btf_param *args;
8802         u32 i, nargs;
8803         int ret;
8804
8805         args = (const struct btf_param *)(meta->func_proto + 1);
8806         nargs = btf_type_vlen(meta->func_proto);
8807         if (nargs > MAX_BPF_FUNC_REG_ARGS) {
8808                 verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
8809                         MAX_BPF_FUNC_REG_ARGS);
8810                 return -EINVAL;
8811         }
8812
8813         /* Check that BTF function arguments match actual types that the
8814          * verifier sees.
8815          */
8816         for (i = 0; i < nargs; i++) {
8817                 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
8818                 const struct btf_type *t, *ref_t, *resolve_ret;
8819                 enum bpf_arg_type arg_type = ARG_DONTCARE;
8820                 u32 regno = i + 1, ref_id, type_size;
8821                 bool is_ret_buf_sz = false;
8822                 int kf_arg_type;
8823
8824                 t = btf_type_skip_modifiers(btf, args[i].type, NULL);
8825
8826                 if (is_kfunc_arg_ignore(btf, &args[i]))
8827                         continue;
8828
8829                 if (btf_type_is_scalar(t)) {
8830                         if (reg->type != SCALAR_VALUE) {
8831                                 verbose(env, "R%d is not a scalar\n", regno);
8832                                 return -EINVAL;
8833                         }
8834
8835                         if (is_kfunc_arg_constant(meta->btf, &args[i])) {
8836                                 if (meta->arg_constant.found) {
8837                                         verbose(env, "verifier internal error: only one constant argument permitted\n");
8838                                         return -EFAULT;
8839                                 }
8840                                 if (!tnum_is_const(reg->var_off)) {
8841                                         verbose(env, "R%d must be a known constant\n", regno);
8842                                         return -EINVAL;
8843                                 }
8844                                 ret = mark_chain_precision(env, regno);
8845                                 if (ret < 0)
8846                                         return ret;
8847                                 meta->arg_constant.found = true;
8848                                 meta->arg_constant.value = reg->var_off.value;
8849                         } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
8850                                 meta->r0_rdonly = true;
8851                                 is_ret_buf_sz = true;
8852                         } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
8853                                 is_ret_buf_sz = true;
8854                         }
8855
8856                         if (is_ret_buf_sz) {
8857                                 if (meta->r0_size) {
8858                                         verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
8859                                         return -EINVAL;
8860                                 }
8861
8862                                 if (!tnum_is_const(reg->var_off)) {
8863                                         verbose(env, "R%d is not a const\n", regno);
8864                                         return -EINVAL;
8865                                 }
8866
8867                                 meta->r0_size = reg->var_off.value;
8868                                 ret = mark_chain_precision(env, regno);
8869                                 if (ret)
8870                                         return ret;
8871                         }
8872                         continue;
8873                 }
8874
8875                 if (!btf_type_is_ptr(t)) {
8876                         verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
8877                         return -EINVAL;
8878                 }
8879
8880                 if (reg->ref_obj_id) {
8881                         if (is_kfunc_release(meta) && meta->ref_obj_id) {
8882                                 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
8883                                         regno, reg->ref_obj_id,
8884                                         meta->ref_obj_id);
8885                                 return -EFAULT;
8886                         }
8887                         meta->ref_obj_id = reg->ref_obj_id;
8888                         if (is_kfunc_release(meta))
8889                                 meta->release_regno = regno;
8890                 }
8891
8892                 ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
8893                 ref_tname = btf_name_by_offset(btf, ref_t->name_off);
8894
8895                 kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
8896                 if (kf_arg_type < 0)
8897                         return kf_arg_type;
8898
8899                 switch (kf_arg_type) {
8900                 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
8901                 case KF_ARG_PTR_TO_BTF_ID:
8902                         if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
8903                                 break;
8904
8905                         if (!is_trusted_reg(reg)) {
8906                                 if (!is_kfunc_rcu(meta)) {
8907                                         verbose(env, "R%d must be referenced or trusted\n", regno);
8908                                         return -EINVAL;
8909                                 }
8910                                 if (!is_rcu_reg(reg)) {
8911                                         verbose(env, "R%d must be a rcu pointer\n", regno);
8912                                         return -EINVAL;
8913                                 }
8914                         }
8915
8916                         fallthrough;
8917                 case KF_ARG_PTR_TO_CTX:
8918                         /* Trusted arguments have the same offset checks as release arguments */
8919                         arg_type |= OBJ_RELEASE;
8920                         break;
8921                 case KF_ARG_PTR_TO_KPTR:
8922                 case KF_ARG_PTR_TO_DYNPTR:
8923                 case KF_ARG_PTR_TO_LIST_HEAD:
8924                 case KF_ARG_PTR_TO_LIST_NODE:
8925                 case KF_ARG_PTR_TO_MEM:
8926                 case KF_ARG_PTR_TO_MEM_SIZE:
8927                         /* Trusted by default */
8928                         break;
8929                 default:
8930                         WARN_ON_ONCE(1);
8931                         return -EFAULT;
8932                 }
8933
8934                 if (is_kfunc_release(meta) && reg->ref_obj_id)
8935                         arg_type |= OBJ_RELEASE;
8936                 ret = check_func_arg_reg_off(env, reg, regno, arg_type);
8937                 if (ret < 0)
8938                         return ret;
8939
8940                 switch (kf_arg_type) {
8941                 case KF_ARG_PTR_TO_CTX:
8942                         if (reg->type != PTR_TO_CTX) {
8943                                 verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
8944                                 return -EINVAL;
8945                         }
8946
8947                         if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
8948                                 ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
8949                                 if (ret < 0)
8950                                         return -EINVAL;
8951                                 meta->ret_btf_id  = ret;
8952                         }
8953                         break;
8954                 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
8955                         if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
8956                                 verbose(env, "arg#%d expected pointer to allocated object\n", i);
8957                                 return -EINVAL;
8958                         }
8959                         if (!reg->ref_obj_id) {
8960                                 verbose(env, "allocated object must be referenced\n");
8961                                 return -EINVAL;
8962                         }
8963                         if (meta->btf == btf_vmlinux &&
8964                             meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
8965                                 meta->arg_obj_drop.btf = reg->btf;
8966                                 meta->arg_obj_drop.btf_id = reg->btf_id;
8967                         }
8968                         break;
8969                 case KF_ARG_PTR_TO_KPTR:
8970                         if (reg->type != PTR_TO_MAP_VALUE) {
8971                                 verbose(env, "arg#0 expected pointer to map value\n");
8972                                 return -EINVAL;
8973                         }
8974                         ret = process_kf_arg_ptr_to_kptr(env, reg, ref_t, ref_tname, meta, i);
8975                         if (ret < 0)
8976                                 return ret;
8977                         break;
8978                 case KF_ARG_PTR_TO_DYNPTR:
8979                         if (reg->type != PTR_TO_STACK &&
8980                             reg->type != CONST_PTR_TO_DYNPTR) {
8981                                 verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
8982                                 return -EINVAL;
8983                         }
8984
8985                         ret = process_dynptr_func(env, regno, ARG_PTR_TO_DYNPTR | MEM_RDONLY, NULL);
8986                         if (ret < 0)
8987                                 return ret;
8988                         break;
8989                 case KF_ARG_PTR_TO_LIST_HEAD:
8990                         if (reg->type != PTR_TO_MAP_VALUE &&
8991                             reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
8992                                 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
8993                                 return -EINVAL;
8994                         }
8995                         if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
8996                                 verbose(env, "allocated object must be referenced\n");
8997                                 return -EINVAL;
8998                         }
8999                         ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
9000                         if (ret < 0)
9001                                 return ret;
9002                         break;
9003                 case KF_ARG_PTR_TO_LIST_NODE:
9004                         if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
9005                                 verbose(env, "arg#%d expected pointer to allocated object\n", i);
9006                                 return -EINVAL;
9007                         }
9008                         if (!reg->ref_obj_id) {
9009                                 verbose(env, "allocated object must be referenced\n");
9010                                 return -EINVAL;
9011                         }
9012                         ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
9013                         if (ret < 0)
9014                                 return ret;
9015                         break;
9016                 case KF_ARG_PTR_TO_BTF_ID:
9017                         /* Only base_type is checked, further checks are done here */
9018                         if ((base_type(reg->type) != PTR_TO_BTF_ID ||
9019                              (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
9020                             !reg2btf_ids[base_type(reg->type)]) {
9021                                 verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
9022                                 verbose(env, "expected %s or socket\n",
9023                                         reg_type_str(env, base_type(reg->type) |
9024                                                           (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
9025                                 return -EINVAL;
9026                         }
9027                         ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
9028                         if (ret < 0)
9029                                 return ret;
9030                         break;
9031                 case KF_ARG_PTR_TO_MEM:
9032                         resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
9033                         if (IS_ERR(resolve_ret)) {
9034                                 verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
9035                                         i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
9036                                 return -EINVAL;
9037                         }
9038                         ret = check_mem_reg(env, reg, regno, type_size);
9039                         if (ret < 0)
9040                                 return ret;
9041                         break;
9042                 case KF_ARG_PTR_TO_MEM_SIZE:
9043                         ret = check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1);
9044                         if (ret < 0) {
9045                                 verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
9046                                 return ret;
9047                         }
9048                         /* Skip next '__sz' argument */
9049                         i++;
9050                         break;
9051                 }
9052         }
9053
9054         if (is_kfunc_release(meta) && !meta->release_regno) {
9055                 verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
9056                         func_name);
9057                 return -EINVAL;
9058         }
9059
9060         return 0;
9061 }
9062
9063 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9064                             int *insn_idx_p)
9065 {
9066         const struct btf_type *t, *func, *func_proto, *ptr_type;
9067         struct bpf_reg_state *regs = cur_regs(env);
9068         const char *func_name, *ptr_type_name;
9069         bool sleepable, rcu_lock, rcu_unlock;
9070         struct bpf_kfunc_call_arg_meta meta;
9071         u32 i, nargs, func_id, ptr_type_id;
9072         int err, insn_idx = *insn_idx_p;
9073         const struct btf_param *args;
9074         const struct btf_type *ret_t;
9075         struct btf *desc_btf;
9076         u32 *kfunc_flags;
9077
9078         /* skip for now, but return error when we find this in fixup_kfunc_call */
9079         if (!insn->imm)
9080                 return 0;
9081
9082         desc_btf = find_kfunc_desc_btf(env, insn->off);
9083         if (IS_ERR(desc_btf))
9084                 return PTR_ERR(desc_btf);
9085
9086         func_id = insn->imm;
9087         func = btf_type_by_id(desc_btf, func_id);
9088         func_name = btf_name_by_offset(desc_btf, func->name_off);
9089         func_proto = btf_type_by_id(desc_btf, func->type);
9090
9091         kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
9092         if (!kfunc_flags) {
9093                 verbose(env, "calling kernel function %s is not allowed\n",
9094                         func_name);
9095                 return -EACCES;
9096         }
9097
9098         /* Prepare kfunc call metadata */
9099         memset(&meta, 0, sizeof(meta));
9100         meta.btf = desc_btf;
9101         meta.func_id = func_id;
9102         meta.kfunc_flags = *kfunc_flags;
9103         meta.func_proto = func_proto;
9104         meta.func_name = func_name;
9105
9106         if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
9107                 verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
9108                 return -EACCES;
9109         }
9110
9111         sleepable = is_kfunc_sleepable(&meta);
9112         if (sleepable && !env->prog->aux->sleepable) {
9113                 verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
9114                 return -EACCES;
9115         }
9116
9117         rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
9118         rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
9119         if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) {
9120                 verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name);
9121                 return -EACCES;
9122         }
9123
9124         if (env->cur_state->active_rcu_lock) {
9125                 struct bpf_func_state *state;
9126                 struct bpf_reg_state *reg;
9127
9128                 if (rcu_lock) {
9129                         verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
9130                         return -EINVAL;
9131                 } else if (rcu_unlock) {
9132                         bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9133                                 if (reg->type & MEM_RCU) {
9134                                         reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
9135                                         reg->type |= PTR_UNTRUSTED;
9136                                 }
9137                         }));
9138                         env->cur_state->active_rcu_lock = false;
9139                 } else if (sleepable) {
9140                         verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
9141                         return -EACCES;
9142                 }
9143         } else if (rcu_lock) {
9144                 env->cur_state->active_rcu_lock = true;
9145         } else if (rcu_unlock) {
9146                 verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
9147                 return -EINVAL;
9148         }
9149
9150         /* Check the arguments */
9151         err = check_kfunc_args(env, &meta);
9152         if (err < 0)
9153                 return err;
9154         /* In case of release function, we get register number of refcounted
9155          * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
9156          */
9157         if (meta.release_regno) {
9158                 err = release_reference(env, regs[meta.release_regno].ref_obj_id);
9159                 if (err) {
9160                         verbose(env, "kfunc %s#%d reference has not been acquired before\n",
9161                                 func_name, func_id);
9162                         return err;
9163                 }
9164         }
9165
9166         for (i = 0; i < CALLER_SAVED_REGS; i++)
9167                 mark_reg_not_init(env, regs, caller_saved[i]);
9168
9169         /* Check return type */
9170         t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
9171
9172         if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
9173                 /* Only exception is bpf_obj_new_impl */
9174                 if (meta.btf != btf_vmlinux || meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl]) {
9175                         verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
9176                         return -EINVAL;
9177                 }
9178         }
9179
9180         if (btf_type_is_scalar(t)) {
9181                 mark_reg_unknown(env, regs, BPF_REG_0);
9182                 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
9183         } else if (btf_type_is_ptr(t)) {
9184                 ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
9185
9186                 if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
9187                         if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
9188                                 struct btf *ret_btf;
9189                                 u32 ret_btf_id;
9190
9191                                 if (unlikely(!bpf_global_ma_set))
9192                                         return -ENOMEM;
9193
9194                                 if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
9195                                         verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
9196                                         return -EINVAL;
9197                                 }
9198
9199                                 ret_btf = env->prog->aux->btf;
9200                                 ret_btf_id = meta.arg_constant.value;
9201
9202                                 /* This may be NULL due to user not supplying a BTF */
9203                                 if (!ret_btf) {
9204                                         verbose(env, "bpf_obj_new requires prog BTF\n");
9205                                         return -EINVAL;
9206                                 }
9207
9208                                 ret_t = btf_type_by_id(ret_btf, ret_btf_id);
9209                                 if (!ret_t || !__btf_type_is_struct(ret_t)) {
9210                                         verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
9211                                         return -EINVAL;
9212                                 }
9213
9214                                 mark_reg_known_zero(env, regs, BPF_REG_0);
9215                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
9216                                 regs[BPF_REG_0].btf = ret_btf;
9217                                 regs[BPF_REG_0].btf_id = ret_btf_id;
9218
9219                                 env->insn_aux_data[insn_idx].obj_new_size = ret_t->size;
9220                                 env->insn_aux_data[insn_idx].kptr_struct_meta =
9221                                         btf_find_struct_meta(ret_btf, ret_btf_id);
9222                         } else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
9223                                 env->insn_aux_data[insn_idx].kptr_struct_meta =
9224                                         btf_find_struct_meta(meta.arg_obj_drop.btf,
9225                                                              meta.arg_obj_drop.btf_id);
9226                         } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
9227                                    meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
9228                                 struct btf_field *field = meta.arg_list_head.field;
9229
9230                                 mark_reg_known_zero(env, regs, BPF_REG_0);
9231                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
9232                                 regs[BPF_REG_0].btf = field->list_head.btf;
9233                                 regs[BPF_REG_0].btf_id = field->list_head.value_btf_id;
9234                                 regs[BPF_REG_0].off = field->list_head.node_offset;
9235                         } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
9236                                 mark_reg_known_zero(env, regs, BPF_REG_0);
9237                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
9238                                 regs[BPF_REG_0].btf = desc_btf;
9239                                 regs[BPF_REG_0].btf_id = meta.ret_btf_id;
9240                         } else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
9241                                 ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
9242                                 if (!ret_t || !btf_type_is_struct(ret_t)) {
9243                                         verbose(env,
9244                                                 "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
9245                                         return -EINVAL;
9246                                 }
9247
9248                                 mark_reg_known_zero(env, regs, BPF_REG_0);
9249                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
9250                                 regs[BPF_REG_0].btf = desc_btf;
9251                                 regs[BPF_REG_0].btf_id = meta.arg_constant.value;
9252                         } else {
9253                                 verbose(env, "kernel function %s unhandled dynamic return type\n",
9254                                         meta.func_name);
9255                                 return -EFAULT;
9256                         }
9257                 } else if (!__btf_type_is_struct(ptr_type)) {
9258                         if (!meta.r0_size) {
9259                                 ptr_type_name = btf_name_by_offset(desc_btf,
9260                                                                    ptr_type->name_off);
9261                                 verbose(env,
9262                                         "kernel function %s returns pointer type %s %s is not supported\n",
9263                                         func_name,
9264                                         btf_type_str(ptr_type),
9265                                         ptr_type_name);
9266                                 return -EINVAL;
9267                         }
9268
9269                         mark_reg_known_zero(env, regs, BPF_REG_0);
9270                         regs[BPF_REG_0].type = PTR_TO_MEM;
9271                         regs[BPF_REG_0].mem_size = meta.r0_size;
9272
9273                         if (meta.r0_rdonly)
9274                                 regs[BPF_REG_0].type |= MEM_RDONLY;
9275
9276                         /* Ensures we don't access the memory after a release_reference() */
9277                         if (meta.ref_obj_id)
9278                                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
9279                 } else {
9280                         mark_reg_known_zero(env, regs, BPF_REG_0);
9281                         regs[BPF_REG_0].btf = desc_btf;
9282                         regs[BPF_REG_0].type = PTR_TO_BTF_ID;
9283                         regs[BPF_REG_0].btf_id = ptr_type_id;
9284                 }
9285
9286                 if (is_kfunc_ret_null(&meta)) {
9287                         regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
9288                         /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
9289                         regs[BPF_REG_0].id = ++env->id_gen;
9290                 }
9291                 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
9292                 if (is_kfunc_acquire(&meta)) {
9293                         int id = acquire_reference_state(env, insn_idx);
9294
9295                         if (id < 0)
9296                                 return id;
9297                         if (is_kfunc_ret_null(&meta))
9298                                 regs[BPF_REG_0].id = id;
9299                         regs[BPF_REG_0].ref_obj_id = id;
9300                 }
9301                 if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
9302                         regs[BPF_REG_0].id = ++env->id_gen;
9303         } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
9304
9305         nargs = btf_type_vlen(func_proto);
9306         args = (const struct btf_param *)(func_proto + 1);
9307         for (i = 0; i < nargs; i++) {
9308                 u32 regno = i + 1;
9309
9310                 t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
9311                 if (btf_type_is_ptr(t))
9312                         mark_btf_func_reg_size(env, regno, sizeof(void *));
9313                 else
9314                         /* scalar. ensured by btf_check_kfunc_arg_match() */
9315                         mark_btf_func_reg_size(env, regno, t->size);
9316         }
9317
9318         return 0;
9319 }
9320
9321 static bool signed_add_overflows(s64 a, s64 b)
9322 {
9323         /* Do the add in u64, where overflow is well-defined */
9324         s64 res = (s64)((u64)a + (u64)b);
9325
9326         if (b < 0)
9327                 return res > a;
9328         return res < a;
9329 }
9330
9331 static bool signed_add32_overflows(s32 a, s32 b)
9332 {
9333         /* Do the add in u32, where overflow is well-defined */
9334         s32 res = (s32)((u32)a + (u32)b);
9335
9336         if (b < 0)
9337                 return res > a;
9338         return res < a;
9339 }
9340
9341 static bool signed_sub_overflows(s64 a, s64 b)
9342 {
9343         /* Do the sub in u64, where overflow is well-defined */
9344         s64 res = (s64)((u64)a - (u64)b);
9345
9346         if (b < 0)
9347                 return res < a;
9348         return res > a;
9349 }
9350
9351 static bool signed_sub32_overflows(s32 a, s32 b)
9352 {
9353         /* Do the sub in u32, where overflow is well-defined */
9354         s32 res = (s32)((u32)a - (u32)b);
9355
9356         if (b < 0)
9357                 return res < a;
9358         return res > a;
9359 }
9360
9361 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
9362                                   const struct bpf_reg_state *reg,
9363                                   enum bpf_reg_type type)
9364 {
9365         bool known = tnum_is_const(reg->var_off);
9366         s64 val = reg->var_off.value;
9367         s64 smin = reg->smin_value;
9368
9369         if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
9370                 verbose(env, "math between %s pointer and %lld is not allowed\n",
9371                         reg_type_str(env, type), val);
9372                 return false;
9373         }
9374
9375         if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
9376                 verbose(env, "%s pointer offset %d is not allowed\n",
9377                         reg_type_str(env, type), reg->off);
9378                 return false;
9379         }
9380
9381         if (smin == S64_MIN) {
9382                 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
9383                         reg_type_str(env, type));
9384                 return false;
9385         }
9386
9387         if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
9388                 verbose(env, "value %lld makes %s pointer be out of bounds\n",
9389                         smin, reg_type_str(env, type));
9390                 return false;
9391         }
9392
9393         return true;
9394 }
9395
9396 enum {
9397         REASON_BOUNDS   = -1,
9398         REASON_TYPE     = -2,
9399         REASON_PATHS    = -3,
9400         REASON_LIMIT    = -4,
9401         REASON_STACK    = -5,
9402 };
9403
9404 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
9405                               u32 *alu_limit, bool mask_to_left)
9406 {
9407         u32 max = 0, ptr_limit = 0;
9408
9409         switch (ptr_reg->type) {
9410         case PTR_TO_STACK:
9411                 /* Offset 0 is out-of-bounds, but acceptable start for the
9412                  * left direction, see BPF_REG_FP. Also, unknown scalar
9413                  * offset where we would need to deal with min/max bounds is
9414                  * currently prohibited for unprivileged.
9415                  */
9416                 max = MAX_BPF_STACK + mask_to_left;
9417                 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
9418                 break;
9419         case PTR_TO_MAP_VALUE:
9420                 max = ptr_reg->map_ptr->value_size;
9421                 ptr_limit = (mask_to_left ?
9422                              ptr_reg->smin_value :
9423                              ptr_reg->umax_value) + ptr_reg->off;
9424                 break;
9425         default:
9426                 return REASON_TYPE;
9427         }
9428
9429         if (ptr_limit >= max)
9430                 return REASON_LIMIT;
9431         *alu_limit = ptr_limit;
9432         return 0;
9433 }
9434
9435 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
9436                                     const struct bpf_insn *insn)
9437 {
9438         return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
9439 }
9440
9441 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
9442                                        u32 alu_state, u32 alu_limit)
9443 {
9444         /* If we arrived here from different branches with different
9445          * state or limits to sanitize, then this won't work.
9446          */
9447         if (aux->alu_state &&
9448             (aux->alu_state != alu_state ||
9449              aux->alu_limit != alu_limit))
9450                 return REASON_PATHS;
9451
9452         /* Corresponding fixup done in do_misc_fixups(). */
9453         aux->alu_state = alu_state;
9454         aux->alu_limit = alu_limit;
9455         return 0;
9456 }
9457
9458 static int sanitize_val_alu(struct bpf_verifier_env *env,
9459                             struct bpf_insn *insn)
9460 {
9461         struct bpf_insn_aux_data *aux = cur_aux(env);
9462
9463         if (can_skip_alu_sanitation(env, insn))
9464                 return 0;
9465
9466         return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
9467 }
9468
9469 static bool sanitize_needed(u8 opcode)
9470 {
9471         return opcode == BPF_ADD || opcode == BPF_SUB;
9472 }
9473
9474 struct bpf_sanitize_info {
9475         struct bpf_insn_aux_data aux;
9476         bool mask_to_left;
9477 };
9478
9479 static struct bpf_verifier_state *
9480 sanitize_speculative_path(struct bpf_verifier_env *env,
9481                           const struct bpf_insn *insn,
9482                           u32 next_idx, u32 curr_idx)
9483 {
9484         struct bpf_verifier_state *branch;
9485         struct bpf_reg_state *regs;
9486
9487         branch = push_stack(env, next_idx, curr_idx, true);
9488         if (branch && insn) {
9489                 regs = branch->frame[branch->curframe]->regs;
9490                 if (BPF_SRC(insn->code) == BPF_K) {
9491                         mark_reg_unknown(env, regs, insn->dst_reg);
9492                 } else if (BPF_SRC(insn->code) == BPF_X) {
9493                         mark_reg_unknown(env, regs, insn->dst_reg);
9494                         mark_reg_unknown(env, regs, insn->src_reg);
9495                 }
9496         }
9497         return branch;
9498 }
9499
9500 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
9501                             struct bpf_insn *insn,
9502                             const struct bpf_reg_state *ptr_reg,
9503                             const struct bpf_reg_state *off_reg,
9504                             struct bpf_reg_state *dst_reg,
9505                             struct bpf_sanitize_info *info,
9506                             const bool commit_window)
9507 {
9508         struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
9509         struct bpf_verifier_state *vstate = env->cur_state;
9510         bool off_is_imm = tnum_is_const(off_reg->var_off);
9511         bool off_is_neg = off_reg->smin_value < 0;
9512         bool ptr_is_dst_reg = ptr_reg == dst_reg;
9513         u8 opcode = BPF_OP(insn->code);
9514         u32 alu_state, alu_limit;
9515         struct bpf_reg_state tmp;
9516         bool ret;
9517         int err;
9518
9519         if (can_skip_alu_sanitation(env, insn))
9520                 return 0;
9521
9522         /* We already marked aux for masking from non-speculative
9523          * paths, thus we got here in the first place. We only care
9524          * to explore bad access from here.
9525          */
9526         if (vstate->speculative)
9527                 goto do_sim;
9528
9529         if (!commit_window) {
9530                 if (!tnum_is_const(off_reg->var_off) &&
9531                     (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
9532                         return REASON_BOUNDS;
9533
9534                 info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
9535                                      (opcode == BPF_SUB && !off_is_neg);
9536         }
9537
9538         err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
9539         if (err < 0)
9540                 return err;
9541
9542         if (commit_window) {
9543                 /* In commit phase we narrow the masking window based on
9544                  * the observed pointer move after the simulated operation.
9545                  */
9546                 alu_state = info->aux.alu_state;
9547                 alu_limit = abs(info->aux.alu_limit - alu_limit);
9548         } else {
9549                 alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
9550                 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
9551                 alu_state |= ptr_is_dst_reg ?
9552                              BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
9553
9554                 /* Limit pruning on unknown scalars to enable deep search for
9555                  * potential masking differences from other program paths.
9556                  */
9557                 if (!off_is_imm)
9558                         env->explore_alu_limits = true;
9559         }
9560
9561         err = update_alu_sanitation_state(aux, alu_state, alu_limit);
9562         if (err < 0)
9563                 return err;
9564 do_sim:
9565         /* If we're in commit phase, we're done here given we already
9566          * pushed the truncated dst_reg into the speculative verification
9567          * stack.
9568          *
9569          * Also, when register is a known constant, we rewrite register-based
9570          * operation to immediate-based, and thus do not need masking (and as
9571          * a consequence, do not need to simulate the zero-truncation either).
9572          */
9573         if (commit_window || off_is_imm)
9574                 return 0;
9575
9576         /* Simulate and find potential out-of-bounds access under
9577          * speculative execution from truncation as a result of
9578          * masking when off was not within expected range. If off
9579          * sits in dst, then we temporarily need to move ptr there
9580          * to simulate dst (== 0) +/-= ptr. Needed, for example,
9581          * for cases where we use K-based arithmetic in one direction
9582          * and truncated reg-based in the other in order to explore
9583          * bad access.
9584          */
9585         if (!ptr_is_dst_reg) {
9586                 tmp = *dst_reg;
9587                 *dst_reg = *ptr_reg;
9588         }
9589         ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
9590                                         env->insn_idx);
9591         if (!ptr_is_dst_reg && ret)
9592                 *dst_reg = tmp;
9593         return !ret ? REASON_STACK : 0;
9594 }
9595
9596 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
9597 {
9598         struct bpf_verifier_state *vstate = env->cur_state;
9599
9600         /* If we simulate paths under speculation, we don't update the
9601          * insn as 'seen' such that when we verify unreachable paths in
9602          * the non-speculative domain, sanitize_dead_code() can still
9603          * rewrite/sanitize them.
9604          */
9605         if (!vstate->speculative)
9606                 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
9607 }
9608
9609 static int sanitize_err(struct bpf_verifier_env *env,
9610                         const struct bpf_insn *insn, int reason,
9611                         const struct bpf_reg_state *off_reg,
9612                         const struct bpf_reg_state *dst_reg)
9613 {
9614         static const char *err = "pointer arithmetic with it prohibited for !root";
9615         const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
9616         u32 dst = insn->dst_reg, src = insn->src_reg;
9617
9618         switch (reason) {
9619         case REASON_BOUNDS:
9620                 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
9621                         off_reg == dst_reg ? dst : src, err);
9622                 break;
9623         case REASON_TYPE:
9624                 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
9625                         off_reg == dst_reg ? src : dst, err);
9626                 break;
9627         case REASON_PATHS:
9628                 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
9629                         dst, op, err);
9630                 break;
9631         case REASON_LIMIT:
9632                 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
9633                         dst, op, err);
9634                 break;
9635         case REASON_STACK:
9636                 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
9637                         dst, err);
9638                 break;
9639         default:
9640                 verbose(env, "verifier internal error: unknown reason (%d)\n",
9641                         reason);
9642                 break;
9643         }
9644
9645         return -EACCES;
9646 }
9647
9648 /* check that stack access falls within stack limits and that 'reg' doesn't
9649  * have a variable offset.
9650  *
9651  * Variable offset is prohibited for unprivileged mode for simplicity since it
9652  * requires corresponding support in Spectre masking for stack ALU.  See also
9653  * retrieve_ptr_limit().
9654  *
9655  *
9656  * 'off' includes 'reg->off'.
9657  */
9658 static int check_stack_access_for_ptr_arithmetic(
9659                                 struct bpf_verifier_env *env,
9660                                 int regno,
9661                                 const struct bpf_reg_state *reg,
9662                                 int off)
9663 {
9664         if (!tnum_is_const(reg->var_off)) {
9665                 char tn_buf[48];
9666
9667                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
9668                 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
9669                         regno, tn_buf, off);
9670                 return -EACCES;
9671         }
9672
9673         if (off >= 0 || off < -MAX_BPF_STACK) {
9674                 verbose(env, "R%d stack pointer arithmetic goes out of range, "
9675                         "prohibited for !root; off=%d\n", regno, off);
9676                 return -EACCES;
9677         }
9678
9679         return 0;
9680 }
9681
9682 static int sanitize_check_bounds(struct bpf_verifier_env *env,
9683                                  const struct bpf_insn *insn,
9684                                  const struct bpf_reg_state *dst_reg)
9685 {
9686         u32 dst = insn->dst_reg;
9687
9688         /* For unprivileged we require that resulting offset must be in bounds
9689          * in order to be able to sanitize access later on.
9690          */
9691         if (env->bypass_spec_v1)
9692                 return 0;
9693
9694         switch (dst_reg->type) {
9695         case PTR_TO_STACK:
9696                 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
9697                                         dst_reg->off + dst_reg->var_off.value))
9698                         return -EACCES;
9699                 break;
9700         case PTR_TO_MAP_VALUE:
9701                 if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
9702                         verbose(env, "R%d pointer arithmetic of map value goes out of range, "
9703                                 "prohibited for !root\n", dst);
9704                         return -EACCES;
9705                 }
9706                 break;
9707         default:
9708                 break;
9709         }
9710
9711         return 0;
9712 }
9713
9714 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
9715  * Caller should also handle BPF_MOV case separately.
9716  * If we return -EACCES, caller may want to try again treating pointer as a
9717  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
9718  */
9719 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
9720                                    struct bpf_insn *insn,
9721                                    const struct bpf_reg_state *ptr_reg,
9722                                    const struct bpf_reg_state *off_reg)
9723 {
9724         struct bpf_verifier_state *vstate = env->cur_state;
9725         struct bpf_func_state *state = vstate->frame[vstate->curframe];
9726         struct bpf_reg_state *regs = state->regs, *dst_reg;
9727         bool known = tnum_is_const(off_reg->var_off);
9728         s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
9729             smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
9730         u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
9731             umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
9732         struct bpf_sanitize_info info = {};
9733         u8 opcode = BPF_OP(insn->code);
9734         u32 dst = insn->dst_reg;
9735         int ret;
9736
9737         dst_reg = &regs[dst];
9738
9739         if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
9740             smin_val > smax_val || umin_val > umax_val) {
9741                 /* Taint dst register if offset had invalid bounds derived from
9742                  * e.g. dead branches.
9743                  */
9744                 __mark_reg_unknown(env, dst_reg);
9745                 return 0;
9746         }
9747
9748         if (BPF_CLASS(insn->code) != BPF_ALU64) {
9749                 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
9750                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
9751                         __mark_reg_unknown(env, dst_reg);
9752                         return 0;
9753                 }
9754
9755                 verbose(env,
9756                         "R%d 32-bit pointer arithmetic prohibited\n",
9757                         dst);
9758                 return -EACCES;
9759         }
9760
9761         if (ptr_reg->type & PTR_MAYBE_NULL) {
9762                 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
9763                         dst, reg_type_str(env, ptr_reg->type));
9764                 return -EACCES;
9765         }
9766
9767         switch (base_type(ptr_reg->type)) {
9768         case CONST_PTR_TO_MAP:
9769                 /* smin_val represents the known value */
9770                 if (known && smin_val == 0 && opcode == BPF_ADD)
9771                         break;
9772                 fallthrough;
9773         case PTR_TO_PACKET_END:
9774         case PTR_TO_SOCKET:
9775         case PTR_TO_SOCK_COMMON:
9776         case PTR_TO_TCP_SOCK:
9777         case PTR_TO_XDP_SOCK:
9778                 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
9779                         dst, reg_type_str(env, ptr_reg->type));
9780                 return -EACCES;
9781         default:
9782                 break;
9783         }
9784
9785         /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
9786          * The id may be overwritten later if we create a new variable offset.
9787          */
9788         dst_reg->type = ptr_reg->type;
9789         dst_reg->id = ptr_reg->id;
9790
9791         if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
9792             !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
9793                 return -EINVAL;
9794
9795         /* pointer types do not carry 32-bit bounds at the moment. */
9796         __mark_reg32_unbounded(dst_reg);
9797
9798         if (sanitize_needed(opcode)) {
9799                 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
9800                                        &info, false);
9801                 if (ret < 0)
9802                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
9803         }
9804
9805         switch (opcode) {
9806         case BPF_ADD:
9807                 /* We can take a fixed offset as long as it doesn't overflow
9808                  * the s32 'off' field
9809                  */
9810                 if (known && (ptr_reg->off + smin_val ==
9811                               (s64)(s32)(ptr_reg->off + smin_val))) {
9812                         /* pointer += K.  Accumulate it into fixed offset */
9813                         dst_reg->smin_value = smin_ptr;
9814                         dst_reg->smax_value = smax_ptr;
9815                         dst_reg->umin_value = umin_ptr;
9816                         dst_reg->umax_value = umax_ptr;
9817                         dst_reg->var_off = ptr_reg->var_off;
9818                         dst_reg->off = ptr_reg->off + smin_val;
9819                         dst_reg->raw = ptr_reg->raw;
9820                         break;
9821                 }
9822                 /* A new variable offset is created.  Note that off_reg->off
9823                  * == 0, since it's a scalar.
9824                  * dst_reg gets the pointer type and since some positive
9825                  * integer value was added to the pointer, give it a new 'id'
9826                  * if it's a PTR_TO_PACKET.
9827                  * this creates a new 'base' pointer, off_reg (variable) gets
9828                  * added into the variable offset, and we copy the fixed offset
9829                  * from ptr_reg.
9830                  */
9831                 if (signed_add_overflows(smin_ptr, smin_val) ||
9832                     signed_add_overflows(smax_ptr, smax_val)) {
9833                         dst_reg->smin_value = S64_MIN;
9834                         dst_reg->smax_value = S64_MAX;
9835                 } else {
9836                         dst_reg->smin_value = smin_ptr + smin_val;
9837                         dst_reg->smax_value = smax_ptr + smax_val;
9838                 }
9839                 if (umin_ptr + umin_val < umin_ptr ||
9840                     umax_ptr + umax_val < umax_ptr) {
9841                         dst_reg->umin_value = 0;
9842                         dst_reg->umax_value = U64_MAX;
9843                 } else {
9844                         dst_reg->umin_value = umin_ptr + umin_val;
9845                         dst_reg->umax_value = umax_ptr + umax_val;
9846                 }
9847                 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
9848                 dst_reg->off = ptr_reg->off;
9849                 dst_reg->raw = ptr_reg->raw;
9850                 if (reg_is_pkt_pointer(ptr_reg)) {
9851                         dst_reg->id = ++env->id_gen;
9852                         /* something was added to pkt_ptr, set range to zero */
9853                         memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
9854                 }
9855                 break;
9856         case BPF_SUB:
9857                 if (dst_reg == off_reg) {
9858                         /* scalar -= pointer.  Creates an unknown scalar */
9859                         verbose(env, "R%d tried to subtract pointer from scalar\n",
9860                                 dst);
9861                         return -EACCES;
9862                 }
9863                 /* We don't allow subtraction from FP, because (according to
9864                  * test_verifier.c test "invalid fp arithmetic", JITs might not
9865                  * be able to deal with it.
9866                  */
9867                 if (ptr_reg->type == PTR_TO_STACK) {
9868                         verbose(env, "R%d subtraction from stack pointer prohibited\n",
9869                                 dst);
9870                         return -EACCES;
9871                 }
9872                 if (known && (ptr_reg->off - smin_val ==
9873                               (s64)(s32)(ptr_reg->off - smin_val))) {
9874                         /* pointer -= K.  Subtract it from fixed offset */
9875                         dst_reg->smin_value = smin_ptr;
9876                         dst_reg->smax_value = smax_ptr;
9877                         dst_reg->umin_value = umin_ptr;
9878                         dst_reg->umax_value = umax_ptr;
9879                         dst_reg->var_off = ptr_reg->var_off;
9880                         dst_reg->id = ptr_reg->id;
9881                         dst_reg->off = ptr_reg->off - smin_val;
9882                         dst_reg->raw = ptr_reg->raw;
9883                         break;
9884                 }
9885                 /* A new variable offset is created.  If the subtrahend is known
9886                  * nonnegative, then any reg->range we had before is still good.
9887                  */
9888                 if (signed_sub_overflows(smin_ptr, smax_val) ||
9889                     signed_sub_overflows(smax_ptr, smin_val)) {
9890                         /* Overflow possible, we know nothing */
9891                         dst_reg->smin_value = S64_MIN;
9892                         dst_reg->smax_value = S64_MAX;
9893                 } else {
9894                         dst_reg->smin_value = smin_ptr - smax_val;
9895                         dst_reg->smax_value = smax_ptr - smin_val;
9896                 }
9897                 if (umin_ptr < umax_val) {
9898                         /* Overflow possible, we know nothing */
9899                         dst_reg->umin_value = 0;
9900                         dst_reg->umax_value = U64_MAX;
9901                 } else {
9902                         /* Cannot overflow (as long as bounds are consistent) */
9903                         dst_reg->umin_value = umin_ptr - umax_val;
9904                         dst_reg->umax_value = umax_ptr - umin_val;
9905                 }
9906                 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
9907                 dst_reg->off = ptr_reg->off;
9908                 dst_reg->raw = ptr_reg->raw;
9909                 if (reg_is_pkt_pointer(ptr_reg)) {
9910                         dst_reg->id = ++env->id_gen;
9911                         /* something was added to pkt_ptr, set range to zero */
9912                         if (smin_val < 0)
9913                                 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
9914                 }
9915                 break;
9916         case BPF_AND:
9917         case BPF_OR:
9918         case BPF_XOR:
9919                 /* bitwise ops on pointers are troublesome, prohibit. */
9920                 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
9921                         dst, bpf_alu_string[opcode >> 4]);
9922                 return -EACCES;
9923         default:
9924                 /* other operators (e.g. MUL,LSH) produce non-pointer results */
9925                 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
9926                         dst, bpf_alu_string[opcode >> 4]);
9927                 return -EACCES;
9928         }
9929
9930         if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
9931                 return -EINVAL;
9932         reg_bounds_sync(dst_reg);
9933         if (sanitize_check_bounds(env, insn, dst_reg) < 0)
9934                 return -EACCES;
9935         if (sanitize_needed(opcode)) {
9936                 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
9937                                        &info, true);
9938                 if (ret < 0)
9939                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
9940         }
9941
9942         return 0;
9943 }
9944
9945 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
9946                                  struct bpf_reg_state *src_reg)
9947 {
9948         s32 smin_val = src_reg->s32_min_value;
9949         s32 smax_val = src_reg->s32_max_value;
9950         u32 umin_val = src_reg->u32_min_value;
9951         u32 umax_val = src_reg->u32_max_value;
9952
9953         if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
9954             signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
9955                 dst_reg->s32_min_value = S32_MIN;
9956                 dst_reg->s32_max_value = S32_MAX;
9957         } else {
9958                 dst_reg->s32_min_value += smin_val;
9959                 dst_reg->s32_max_value += smax_val;
9960         }
9961         if (dst_reg->u32_min_value + umin_val < umin_val ||
9962             dst_reg->u32_max_value + umax_val < umax_val) {
9963                 dst_reg->u32_min_value = 0;
9964                 dst_reg->u32_max_value = U32_MAX;
9965         } else {
9966                 dst_reg->u32_min_value += umin_val;
9967                 dst_reg->u32_max_value += umax_val;
9968         }
9969 }
9970
9971 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
9972                                struct bpf_reg_state *src_reg)
9973 {
9974         s64 smin_val = src_reg->smin_value;
9975         s64 smax_val = src_reg->smax_value;
9976         u64 umin_val = src_reg->umin_value;
9977         u64 umax_val = src_reg->umax_value;
9978
9979         if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
9980             signed_add_overflows(dst_reg->smax_value, smax_val)) {
9981                 dst_reg->smin_value = S64_MIN;
9982                 dst_reg->smax_value = S64_MAX;
9983         } else {
9984                 dst_reg->smin_value += smin_val;
9985                 dst_reg->smax_value += smax_val;
9986         }
9987         if (dst_reg->umin_value + umin_val < umin_val ||
9988             dst_reg->umax_value + umax_val < umax_val) {
9989                 dst_reg->umin_value = 0;
9990                 dst_reg->umax_value = U64_MAX;
9991         } else {
9992                 dst_reg->umin_value += umin_val;
9993                 dst_reg->umax_value += umax_val;
9994         }
9995 }
9996
9997 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
9998                                  struct bpf_reg_state *src_reg)
9999 {
10000         s32 smin_val = src_reg->s32_min_value;
10001         s32 smax_val = src_reg->s32_max_value;
10002         u32 umin_val = src_reg->u32_min_value;
10003         u32 umax_val = src_reg->u32_max_value;
10004
10005         if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
10006             signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
10007                 /* Overflow possible, we know nothing */
10008                 dst_reg->s32_min_value = S32_MIN;
10009                 dst_reg->s32_max_value = S32_MAX;
10010         } else {
10011                 dst_reg->s32_min_value -= smax_val;
10012                 dst_reg->s32_max_value -= smin_val;
10013         }
10014         if (dst_reg->u32_min_value < umax_val) {
10015                 /* Overflow possible, we know nothing */
10016                 dst_reg->u32_min_value = 0;
10017                 dst_reg->u32_max_value = U32_MAX;
10018         } else {
10019                 /* Cannot overflow (as long as bounds are consistent) */
10020                 dst_reg->u32_min_value -= umax_val;
10021                 dst_reg->u32_max_value -= umin_val;
10022         }
10023 }
10024
10025 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
10026                                struct bpf_reg_state *src_reg)
10027 {
10028         s64 smin_val = src_reg->smin_value;
10029         s64 smax_val = src_reg->smax_value;
10030         u64 umin_val = src_reg->umin_value;
10031         u64 umax_val = src_reg->umax_value;
10032
10033         if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
10034             signed_sub_overflows(dst_reg->smax_value, smin_val)) {
10035                 /* Overflow possible, we know nothing */
10036                 dst_reg->smin_value = S64_MIN;
10037                 dst_reg->smax_value = S64_MAX;
10038         } else {
10039                 dst_reg->smin_value -= smax_val;
10040                 dst_reg->smax_value -= smin_val;
10041         }
10042         if (dst_reg->umin_value < umax_val) {
10043                 /* Overflow possible, we know nothing */
10044                 dst_reg->umin_value = 0;
10045                 dst_reg->umax_value = U64_MAX;
10046         } else {
10047                 /* Cannot overflow (as long as bounds are consistent) */
10048                 dst_reg->umin_value -= umax_val;
10049                 dst_reg->umax_value -= umin_val;
10050         }
10051 }
10052
10053 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
10054                                  struct bpf_reg_state *src_reg)
10055 {
10056         s32 smin_val = src_reg->s32_min_value;
10057         u32 umin_val = src_reg->u32_min_value;
10058         u32 umax_val = src_reg->u32_max_value;
10059
10060         if (smin_val < 0 || dst_reg->s32_min_value < 0) {
10061                 /* Ain't nobody got time to multiply that sign */
10062                 __mark_reg32_unbounded(dst_reg);
10063                 return;
10064         }
10065         /* Both values are positive, so we can work with unsigned and
10066          * copy the result to signed (unless it exceeds S32_MAX).
10067          */
10068         if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
10069                 /* Potential overflow, we know nothing */
10070                 __mark_reg32_unbounded(dst_reg);
10071                 return;
10072         }
10073         dst_reg->u32_min_value *= umin_val;
10074         dst_reg->u32_max_value *= umax_val;
10075         if (dst_reg->u32_max_value > S32_MAX) {
10076                 /* Overflow possible, we know nothing */
10077                 dst_reg->s32_min_value = S32_MIN;
10078                 dst_reg->s32_max_value = S32_MAX;
10079         } else {
10080                 dst_reg->s32_min_value = dst_reg->u32_min_value;
10081                 dst_reg->s32_max_value = dst_reg->u32_max_value;
10082         }
10083 }
10084
10085 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
10086                                struct bpf_reg_state *src_reg)
10087 {
10088         s64 smin_val = src_reg->smin_value;
10089         u64 umin_val = src_reg->umin_value;
10090         u64 umax_val = src_reg->umax_value;
10091
10092         if (smin_val < 0 || dst_reg->smin_value < 0) {
10093                 /* Ain't nobody got time to multiply that sign */
10094                 __mark_reg64_unbounded(dst_reg);
10095                 return;
10096         }
10097         /* Both values are positive, so we can work with unsigned and
10098          * copy the result to signed (unless it exceeds S64_MAX).
10099          */
10100         if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
10101                 /* Potential overflow, we know nothing */
10102                 __mark_reg64_unbounded(dst_reg);
10103                 return;
10104         }
10105         dst_reg->umin_value *= umin_val;
10106         dst_reg->umax_value *= umax_val;
10107         if (dst_reg->umax_value > S64_MAX) {
10108                 /* Overflow possible, we know nothing */
10109                 dst_reg->smin_value = S64_MIN;
10110                 dst_reg->smax_value = S64_MAX;
10111         } else {
10112                 dst_reg->smin_value = dst_reg->umin_value;
10113                 dst_reg->smax_value = dst_reg->umax_value;
10114         }
10115 }
10116
10117 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
10118                                  struct bpf_reg_state *src_reg)
10119 {
10120         bool src_known = tnum_subreg_is_const(src_reg->var_off);
10121         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
10122         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
10123         s32 smin_val = src_reg->s32_min_value;
10124         u32 umax_val = src_reg->u32_max_value;
10125
10126         if (src_known && dst_known) {
10127                 __mark_reg32_known(dst_reg, var32_off.value);
10128                 return;
10129         }
10130
10131         /* We get our minimum from the var_off, since that's inherently
10132          * bitwise.  Our maximum is the minimum of the operands' maxima.
10133          */
10134         dst_reg->u32_min_value = var32_off.value;
10135         dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
10136         if (dst_reg->s32_min_value < 0 || smin_val < 0) {
10137                 /* Lose signed bounds when ANDing negative numbers,
10138                  * ain't nobody got time for that.
10139                  */
10140                 dst_reg->s32_min_value = S32_MIN;
10141                 dst_reg->s32_max_value = S32_MAX;
10142         } else {
10143                 /* ANDing two positives gives a positive, so safe to
10144                  * cast result into s64.
10145                  */
10146                 dst_reg->s32_min_value = dst_reg->u32_min_value;
10147                 dst_reg->s32_max_value = dst_reg->u32_max_value;
10148         }
10149 }
10150
10151 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
10152                                struct bpf_reg_state *src_reg)
10153 {
10154         bool src_known = tnum_is_const(src_reg->var_off);
10155         bool dst_known = tnum_is_const(dst_reg->var_off);
10156         s64 smin_val = src_reg->smin_value;
10157         u64 umax_val = src_reg->umax_value;
10158
10159         if (src_known && dst_known) {
10160                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
10161                 return;
10162         }
10163
10164         /* We get our minimum from the var_off, since that's inherently
10165          * bitwise.  Our maximum is the minimum of the operands' maxima.
10166          */
10167         dst_reg->umin_value = dst_reg->var_off.value;
10168         dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
10169         if (dst_reg->smin_value < 0 || smin_val < 0) {
10170                 /* Lose signed bounds when ANDing negative numbers,
10171                  * ain't nobody got time for that.
10172                  */
10173                 dst_reg->smin_value = S64_MIN;
10174                 dst_reg->smax_value = S64_MAX;
10175         } else {
10176                 /* ANDing two positives gives a positive, so safe to
10177                  * cast result into s64.
10178                  */
10179                 dst_reg->smin_value = dst_reg->umin_value;
10180                 dst_reg->smax_value = dst_reg->umax_value;
10181         }
10182         /* We may learn something more from the var_off */
10183         __update_reg_bounds(dst_reg);
10184 }
10185
10186 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
10187                                 struct bpf_reg_state *src_reg)
10188 {
10189         bool src_known = tnum_subreg_is_const(src_reg->var_off);
10190         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
10191         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
10192         s32 smin_val = src_reg->s32_min_value;
10193         u32 umin_val = src_reg->u32_min_value;
10194
10195         if (src_known && dst_known) {
10196                 __mark_reg32_known(dst_reg, var32_off.value);
10197                 return;
10198         }
10199
10200         /* We get our maximum from the var_off, and our minimum is the
10201          * maximum of the operands' minima
10202          */
10203         dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
10204         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
10205         if (dst_reg->s32_min_value < 0 || smin_val < 0) {
10206                 /* Lose signed bounds when ORing negative numbers,
10207                  * ain't nobody got time for that.
10208                  */
10209                 dst_reg->s32_min_value = S32_MIN;
10210                 dst_reg->s32_max_value = S32_MAX;
10211         } else {
10212                 /* ORing two positives gives a positive, so safe to
10213                  * cast result into s64.
10214                  */
10215                 dst_reg->s32_min_value = dst_reg->u32_min_value;
10216                 dst_reg->s32_max_value = dst_reg->u32_max_value;
10217         }
10218 }
10219
10220 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
10221                               struct bpf_reg_state *src_reg)
10222 {
10223         bool src_known = tnum_is_const(src_reg->var_off);
10224         bool dst_known = tnum_is_const(dst_reg->var_off);
10225         s64 smin_val = src_reg->smin_value;
10226         u64 umin_val = src_reg->umin_value;
10227
10228         if (src_known && dst_known) {
10229                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
10230                 return;
10231         }
10232
10233         /* We get our maximum from the var_off, and our minimum is the
10234          * maximum of the operands' minima
10235          */
10236         dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
10237         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
10238         if (dst_reg->smin_value < 0 || smin_val < 0) {
10239                 /* Lose signed bounds when ORing negative numbers,
10240                  * ain't nobody got time for that.
10241                  */
10242                 dst_reg->smin_value = S64_MIN;
10243                 dst_reg->smax_value = S64_MAX;
10244         } else {
10245                 /* ORing two positives gives a positive, so safe to
10246                  * cast result into s64.
10247                  */
10248                 dst_reg->smin_value = dst_reg->umin_value;
10249                 dst_reg->smax_value = dst_reg->umax_value;
10250         }
10251         /* We may learn something more from the var_off */
10252         __update_reg_bounds(dst_reg);
10253 }
10254
10255 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
10256                                  struct bpf_reg_state *src_reg)
10257 {
10258         bool src_known = tnum_subreg_is_const(src_reg->var_off);
10259         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
10260         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
10261         s32 smin_val = src_reg->s32_min_value;
10262
10263         if (src_known && dst_known) {
10264                 __mark_reg32_known(dst_reg, var32_off.value);
10265                 return;
10266         }
10267
10268         /* We get both minimum and maximum from the var32_off. */
10269         dst_reg->u32_min_value = var32_off.value;
10270         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
10271
10272         if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
10273                 /* XORing two positive sign numbers gives a positive,
10274                  * so safe to cast u32 result into s32.
10275                  */
10276                 dst_reg->s32_min_value = dst_reg->u32_min_value;
10277                 dst_reg->s32_max_value = dst_reg->u32_max_value;
10278         } else {
10279                 dst_reg->s32_min_value = S32_MIN;
10280                 dst_reg->s32_max_value = S32_MAX;
10281         }
10282 }
10283
10284 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
10285                                struct bpf_reg_state *src_reg)
10286 {
10287         bool src_known = tnum_is_const(src_reg->var_off);
10288         bool dst_known = tnum_is_const(dst_reg->var_off);
10289         s64 smin_val = src_reg->smin_value;
10290
10291         if (src_known && dst_known) {
10292                 /* dst_reg->var_off.value has been updated earlier */
10293                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
10294                 return;
10295         }
10296
10297         /* We get both minimum and maximum from the var_off. */
10298         dst_reg->umin_value = dst_reg->var_off.value;
10299         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
10300
10301         if (dst_reg->smin_value >= 0 && smin_val >= 0) {
10302                 /* XORing two positive sign numbers gives a positive,
10303                  * so safe to cast u64 result into s64.
10304                  */
10305                 dst_reg->smin_value = dst_reg->umin_value;
10306                 dst_reg->smax_value = dst_reg->umax_value;
10307         } else {
10308                 dst_reg->smin_value = S64_MIN;
10309                 dst_reg->smax_value = S64_MAX;
10310         }
10311
10312         __update_reg_bounds(dst_reg);
10313 }
10314
10315 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
10316                                    u64 umin_val, u64 umax_val)
10317 {
10318         /* We lose all sign bit information (except what we can pick
10319          * up from var_off)
10320          */
10321         dst_reg->s32_min_value = S32_MIN;
10322         dst_reg->s32_max_value = S32_MAX;
10323         /* If we might shift our top bit out, then we know nothing */
10324         if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
10325                 dst_reg->u32_min_value = 0;
10326                 dst_reg->u32_max_value = U32_MAX;
10327         } else {
10328                 dst_reg->u32_min_value <<= umin_val;
10329                 dst_reg->u32_max_value <<= umax_val;
10330         }
10331 }
10332
10333 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
10334                                  struct bpf_reg_state *src_reg)
10335 {
10336         u32 umax_val = src_reg->u32_max_value;
10337         u32 umin_val = src_reg->u32_min_value;
10338         /* u32 alu operation will zext upper bits */
10339         struct tnum subreg = tnum_subreg(dst_reg->var_off);
10340
10341         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
10342         dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
10343         /* Not required but being careful mark reg64 bounds as unknown so
10344          * that we are forced to pick them up from tnum and zext later and
10345          * if some path skips this step we are still safe.
10346          */
10347         __mark_reg64_unbounded(dst_reg);
10348         __update_reg32_bounds(dst_reg);
10349 }
10350
10351 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
10352                                    u64 umin_val, u64 umax_val)
10353 {
10354         /* Special case <<32 because it is a common compiler pattern to sign
10355          * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
10356          * positive we know this shift will also be positive so we can track
10357          * bounds correctly. Otherwise we lose all sign bit information except
10358          * what we can pick up from var_off. Perhaps we can generalize this
10359          * later to shifts of any length.
10360          */
10361         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
10362                 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
10363         else
10364                 dst_reg->smax_value = S64_MAX;
10365
10366         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
10367                 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
10368         else
10369                 dst_reg->smin_value = S64_MIN;
10370
10371         /* If we might shift our top bit out, then we know nothing */
10372         if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
10373                 dst_reg->umin_value = 0;
10374                 dst_reg->umax_value = U64_MAX;
10375         } else {
10376                 dst_reg->umin_value <<= umin_val;
10377                 dst_reg->umax_value <<= umax_val;
10378         }
10379 }
10380
10381 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
10382                                struct bpf_reg_state *src_reg)
10383 {
10384         u64 umax_val = src_reg->umax_value;
10385         u64 umin_val = src_reg->umin_value;
10386
10387         /* scalar64 calc uses 32bit unshifted bounds so must be called first */
10388         __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
10389         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
10390
10391         dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
10392         /* We may learn something more from the var_off */
10393         __update_reg_bounds(dst_reg);
10394 }
10395
10396 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
10397                                  struct bpf_reg_state *src_reg)
10398 {
10399         struct tnum subreg = tnum_subreg(dst_reg->var_off);
10400         u32 umax_val = src_reg->u32_max_value;
10401         u32 umin_val = src_reg->u32_min_value;
10402
10403         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
10404          * be negative, then either:
10405          * 1) src_reg might be zero, so the sign bit of the result is
10406          *    unknown, so we lose our signed bounds
10407          * 2) it's known negative, thus the unsigned bounds capture the
10408          *    signed bounds
10409          * 3) the signed bounds cross zero, so they tell us nothing
10410          *    about the result
10411          * If the value in dst_reg is known nonnegative, then again the
10412          * unsigned bounds capture the signed bounds.
10413          * Thus, in all cases it suffices to blow away our signed bounds
10414          * and rely on inferring new ones from the unsigned bounds and
10415          * var_off of the result.
10416          */
10417         dst_reg->s32_min_value = S32_MIN;
10418         dst_reg->s32_max_value = S32_MAX;
10419
10420         dst_reg->var_off = tnum_rshift(subreg, umin_val);
10421         dst_reg->u32_min_value >>= umax_val;
10422         dst_reg->u32_max_value >>= umin_val;
10423
10424         __mark_reg64_unbounded(dst_reg);
10425         __update_reg32_bounds(dst_reg);
10426 }
10427
10428 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
10429                                struct bpf_reg_state *src_reg)
10430 {
10431         u64 umax_val = src_reg->umax_value;
10432         u64 umin_val = src_reg->umin_value;
10433
10434         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
10435          * be negative, then either:
10436          * 1) src_reg might be zero, so the sign bit of the result is
10437          *    unknown, so we lose our signed bounds
10438          * 2) it's known negative, thus the unsigned bounds capture the
10439          *    signed bounds
10440          * 3) the signed bounds cross zero, so they tell us nothing
10441          *    about the result
10442          * If the value in dst_reg is known nonnegative, then again the
10443          * unsigned bounds capture the signed bounds.
10444          * Thus, in all cases it suffices to blow away our signed bounds
10445          * and rely on inferring new ones from the unsigned bounds and
10446          * var_off of the result.
10447          */
10448         dst_reg->smin_value = S64_MIN;
10449         dst_reg->smax_value = S64_MAX;
10450         dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
10451         dst_reg->umin_value >>= umax_val;
10452         dst_reg->umax_value >>= umin_val;
10453
10454         /* Its not easy to operate on alu32 bounds here because it depends
10455          * on bits being shifted in. Take easy way out and mark unbounded
10456          * so we can recalculate later from tnum.
10457          */
10458         __mark_reg32_unbounded(dst_reg);
10459         __update_reg_bounds(dst_reg);
10460 }
10461
10462 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
10463                                   struct bpf_reg_state *src_reg)
10464 {
10465         u64 umin_val = src_reg->u32_min_value;
10466
10467         /* Upon reaching here, src_known is true and
10468          * umax_val is equal to umin_val.
10469          */
10470         dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
10471         dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
10472
10473         dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
10474
10475         /* blow away the dst_reg umin_value/umax_value and rely on
10476          * dst_reg var_off to refine the result.
10477          */
10478         dst_reg->u32_min_value = 0;
10479         dst_reg->u32_max_value = U32_MAX;
10480
10481         __mark_reg64_unbounded(dst_reg);
10482         __update_reg32_bounds(dst_reg);
10483 }
10484
10485 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
10486                                 struct bpf_reg_state *src_reg)
10487 {
10488         u64 umin_val = src_reg->umin_value;
10489
10490         /* Upon reaching here, src_known is true and umax_val is equal
10491          * to umin_val.
10492          */
10493         dst_reg->smin_value >>= umin_val;
10494         dst_reg->smax_value >>= umin_val;
10495
10496         dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
10497
10498         /* blow away the dst_reg umin_value/umax_value and rely on
10499          * dst_reg var_off to refine the result.
10500          */
10501         dst_reg->umin_value = 0;
10502         dst_reg->umax_value = U64_MAX;
10503
10504         /* Its not easy to operate on alu32 bounds here because it depends
10505          * on bits being shifted in from upper 32-bits. Take easy way out
10506          * and mark unbounded so we can recalculate later from tnum.
10507          */
10508         __mark_reg32_unbounded(dst_reg);
10509         __update_reg_bounds(dst_reg);
10510 }
10511
10512 /* WARNING: This function does calculations on 64-bit values, but the actual
10513  * execution may occur on 32-bit values. Therefore, things like bitshifts
10514  * need extra checks in the 32-bit case.
10515  */
10516 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
10517                                       struct bpf_insn *insn,
10518                                       struct bpf_reg_state *dst_reg,
10519                                       struct bpf_reg_state src_reg)
10520 {
10521         struct bpf_reg_state *regs = cur_regs(env);
10522         u8 opcode = BPF_OP(insn->code);
10523         bool src_known;
10524         s64 smin_val, smax_val;
10525         u64 umin_val, umax_val;
10526         s32 s32_min_val, s32_max_val;
10527         u32 u32_min_val, u32_max_val;
10528         u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
10529         bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
10530         int ret;
10531
10532         smin_val = src_reg.smin_value;
10533         smax_val = src_reg.smax_value;
10534         umin_val = src_reg.umin_value;
10535         umax_val = src_reg.umax_value;
10536
10537         s32_min_val = src_reg.s32_min_value;
10538         s32_max_val = src_reg.s32_max_value;
10539         u32_min_val = src_reg.u32_min_value;
10540         u32_max_val = src_reg.u32_max_value;
10541
10542         if (alu32) {
10543                 src_known = tnum_subreg_is_const(src_reg.var_off);
10544                 if ((src_known &&
10545                      (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
10546                     s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
10547                         /* Taint dst register if offset had invalid bounds
10548                          * derived from e.g. dead branches.
10549                          */
10550                         __mark_reg_unknown(env, dst_reg);
10551                         return 0;
10552                 }
10553         } else {
10554                 src_known = tnum_is_const(src_reg.var_off);
10555                 if ((src_known &&
10556                      (smin_val != smax_val || umin_val != umax_val)) ||
10557                     smin_val > smax_val || umin_val > umax_val) {
10558                         /* Taint dst register if offset had invalid bounds
10559                          * derived from e.g. dead branches.
10560                          */
10561                         __mark_reg_unknown(env, dst_reg);
10562                         return 0;
10563                 }
10564         }
10565
10566         if (!src_known &&
10567             opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
10568                 __mark_reg_unknown(env, dst_reg);
10569                 return 0;
10570         }
10571
10572         if (sanitize_needed(opcode)) {
10573                 ret = sanitize_val_alu(env, insn);
10574                 if (ret < 0)
10575                         return sanitize_err(env, insn, ret, NULL, NULL);
10576         }
10577
10578         /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
10579          * There are two classes of instructions: The first class we track both
10580          * alu32 and alu64 sign/unsigned bounds independently this provides the
10581          * greatest amount of precision when alu operations are mixed with jmp32
10582          * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
10583          * and BPF_OR. This is possible because these ops have fairly easy to
10584          * understand and calculate behavior in both 32-bit and 64-bit alu ops.
10585          * See alu32 verifier tests for examples. The second class of
10586          * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
10587          * with regards to tracking sign/unsigned bounds because the bits may
10588          * cross subreg boundaries in the alu64 case. When this happens we mark
10589          * the reg unbounded in the subreg bound space and use the resulting
10590          * tnum to calculate an approximation of the sign/unsigned bounds.
10591          */
10592         switch (opcode) {
10593         case BPF_ADD:
10594                 scalar32_min_max_add(dst_reg, &src_reg);
10595                 scalar_min_max_add(dst_reg, &src_reg);
10596                 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
10597                 break;
10598         case BPF_SUB:
10599                 scalar32_min_max_sub(dst_reg, &src_reg);
10600                 scalar_min_max_sub(dst_reg, &src_reg);
10601                 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
10602                 break;
10603         case BPF_MUL:
10604                 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
10605                 scalar32_min_max_mul(dst_reg, &src_reg);
10606                 scalar_min_max_mul(dst_reg, &src_reg);
10607                 break;
10608         case BPF_AND:
10609                 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
10610                 scalar32_min_max_and(dst_reg, &src_reg);
10611                 scalar_min_max_and(dst_reg, &src_reg);
10612                 break;
10613         case BPF_OR:
10614                 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
10615                 scalar32_min_max_or(dst_reg, &src_reg);
10616                 scalar_min_max_or(dst_reg, &src_reg);
10617                 break;
10618         case BPF_XOR:
10619                 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
10620                 scalar32_min_max_xor(dst_reg, &src_reg);
10621                 scalar_min_max_xor(dst_reg, &src_reg);
10622                 break;
10623         case BPF_LSH:
10624                 if (umax_val >= insn_bitness) {
10625                         /* Shifts greater than 31 or 63 are undefined.
10626                          * This includes shifts by a negative number.
10627                          */
10628                         mark_reg_unknown(env, regs, insn->dst_reg);
10629                         break;
10630                 }
10631                 if (alu32)
10632                         scalar32_min_max_lsh(dst_reg, &src_reg);
10633                 else
10634                         scalar_min_max_lsh(dst_reg, &src_reg);
10635                 break;
10636         case BPF_RSH:
10637                 if (umax_val >= insn_bitness) {
10638                         /* Shifts greater than 31 or 63 are undefined.
10639                          * This includes shifts by a negative number.
10640                          */
10641                         mark_reg_unknown(env, regs, insn->dst_reg);
10642                         break;
10643                 }
10644                 if (alu32)
10645                         scalar32_min_max_rsh(dst_reg, &src_reg);
10646                 else
10647                         scalar_min_max_rsh(dst_reg, &src_reg);
10648                 break;
10649         case BPF_ARSH:
10650                 if (umax_val >= insn_bitness) {
10651                         /* Shifts greater than 31 or 63 are undefined.
10652                          * This includes shifts by a negative number.
10653                          */
10654                         mark_reg_unknown(env, regs, insn->dst_reg);
10655                         break;
10656                 }
10657                 if (alu32)
10658                         scalar32_min_max_arsh(dst_reg, &src_reg);
10659                 else
10660                         scalar_min_max_arsh(dst_reg, &src_reg);
10661                 break;
10662         default:
10663                 mark_reg_unknown(env, regs, insn->dst_reg);
10664                 break;
10665         }
10666
10667         /* ALU32 ops are zero extended into 64bit register */
10668         if (alu32)
10669                 zext_32_to_64(dst_reg);
10670         reg_bounds_sync(dst_reg);
10671         return 0;
10672 }
10673
10674 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
10675  * and var_off.
10676  */
10677 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
10678                                    struct bpf_insn *insn)
10679 {
10680         struct bpf_verifier_state *vstate = env->cur_state;
10681         struct bpf_func_state *state = vstate->frame[vstate->curframe];
10682         struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
10683         struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
10684         u8 opcode = BPF_OP(insn->code);
10685         int err;
10686
10687         dst_reg = &regs[insn->dst_reg];
10688         src_reg = NULL;
10689         if (dst_reg->type != SCALAR_VALUE)
10690                 ptr_reg = dst_reg;
10691         else
10692                 /* Make sure ID is cleared otherwise dst_reg min/max could be
10693                  * incorrectly propagated into other registers by find_equal_scalars()
10694                  */
10695                 dst_reg->id = 0;
10696         if (BPF_SRC(insn->code) == BPF_X) {
10697                 src_reg = &regs[insn->src_reg];
10698                 if (src_reg->type != SCALAR_VALUE) {
10699                         if (dst_reg->type != SCALAR_VALUE) {
10700                                 /* Combining two pointers by any ALU op yields
10701                                  * an arbitrary scalar. Disallow all math except
10702                                  * pointer subtraction
10703                                  */
10704                                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
10705                                         mark_reg_unknown(env, regs, insn->dst_reg);
10706                                         return 0;
10707                                 }
10708                                 verbose(env, "R%d pointer %s pointer prohibited\n",
10709                                         insn->dst_reg,
10710                                         bpf_alu_string[opcode >> 4]);
10711                                 return -EACCES;
10712                         } else {
10713                                 /* scalar += pointer
10714                                  * This is legal, but we have to reverse our
10715                                  * src/dest handling in computing the range
10716                                  */
10717                                 err = mark_chain_precision(env, insn->dst_reg);
10718                                 if (err)
10719                                         return err;
10720                                 return adjust_ptr_min_max_vals(env, insn,
10721                                                                src_reg, dst_reg);
10722                         }
10723                 } else if (ptr_reg) {
10724                         /* pointer += scalar */
10725                         err = mark_chain_precision(env, insn->src_reg);
10726                         if (err)
10727                                 return err;
10728                         return adjust_ptr_min_max_vals(env, insn,
10729                                                        dst_reg, src_reg);
10730                 } else if (dst_reg->precise) {
10731                         /* if dst_reg is precise, src_reg should be precise as well */
10732                         err = mark_chain_precision(env, insn->src_reg);
10733                         if (err)
10734                                 return err;
10735                 }
10736         } else {
10737                 /* Pretend the src is a reg with a known value, since we only
10738                  * need to be able to read from this state.
10739                  */
10740                 off_reg.type = SCALAR_VALUE;
10741                 __mark_reg_known(&off_reg, insn->imm);
10742                 src_reg = &off_reg;
10743                 if (ptr_reg) /* pointer += K */
10744                         return adjust_ptr_min_max_vals(env, insn,
10745                                                        ptr_reg, src_reg);
10746         }
10747
10748         /* Got here implies adding two SCALAR_VALUEs */
10749         if (WARN_ON_ONCE(ptr_reg)) {
10750                 print_verifier_state(env, state, true);
10751                 verbose(env, "verifier internal error: unexpected ptr_reg\n");
10752                 return -EINVAL;
10753         }
10754         if (WARN_ON(!src_reg)) {
10755                 print_verifier_state(env, state, true);
10756                 verbose(env, "verifier internal error: no src_reg\n");
10757                 return -EINVAL;
10758         }
10759         return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
10760 }
10761
10762 /* check validity of 32-bit and 64-bit arithmetic operations */
10763 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
10764 {
10765         struct bpf_reg_state *regs = cur_regs(env);
10766         u8 opcode = BPF_OP(insn->code);
10767         int err;
10768
10769         if (opcode == BPF_END || opcode == BPF_NEG) {
10770                 if (opcode == BPF_NEG) {
10771                         if (BPF_SRC(insn->code) != BPF_K ||
10772                             insn->src_reg != BPF_REG_0 ||
10773                             insn->off != 0 || insn->imm != 0) {
10774                                 verbose(env, "BPF_NEG uses reserved fields\n");
10775                                 return -EINVAL;
10776                         }
10777                 } else {
10778                         if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
10779                             (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
10780                             BPF_CLASS(insn->code) == BPF_ALU64) {
10781                                 verbose(env, "BPF_END uses reserved fields\n");
10782                                 return -EINVAL;
10783                         }
10784                 }
10785
10786                 /* check src operand */
10787                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
10788                 if (err)
10789                         return err;
10790
10791                 if (is_pointer_value(env, insn->dst_reg)) {
10792                         verbose(env, "R%d pointer arithmetic prohibited\n",
10793                                 insn->dst_reg);
10794                         return -EACCES;
10795                 }
10796
10797                 /* check dest operand */
10798                 err = check_reg_arg(env, insn->dst_reg, DST_OP);
10799                 if (err)
10800                         return err;
10801
10802         } else if (opcode == BPF_MOV) {
10803
10804                 if (BPF_SRC(insn->code) == BPF_X) {
10805                         if (insn->imm != 0 || insn->off != 0) {
10806                                 verbose(env, "BPF_MOV uses reserved fields\n");
10807                                 return -EINVAL;
10808                         }
10809
10810                         /* check src operand */
10811                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
10812                         if (err)
10813                                 return err;
10814                 } else {
10815                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
10816                                 verbose(env, "BPF_MOV uses reserved fields\n");
10817                                 return -EINVAL;
10818                         }
10819                 }
10820
10821                 /* check dest operand, mark as required later */
10822                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
10823                 if (err)
10824                         return err;
10825
10826                 if (BPF_SRC(insn->code) == BPF_X) {
10827                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
10828                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
10829
10830                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
10831                                 /* case: R1 = R2
10832                                  * copy register state to dest reg
10833                                  */
10834                                 if (src_reg->type == SCALAR_VALUE && !src_reg->id)
10835                                         /* Assign src and dst registers the same ID
10836                                          * that will be used by find_equal_scalars()
10837                                          * to propagate min/max range.
10838                                          */
10839                                         src_reg->id = ++env->id_gen;
10840                                 *dst_reg = *src_reg;
10841                                 dst_reg->live |= REG_LIVE_WRITTEN;
10842                                 dst_reg->subreg_def = DEF_NOT_SUBREG;
10843                         } else {
10844                                 /* R1 = (u32) R2 */
10845                                 if (is_pointer_value(env, insn->src_reg)) {
10846                                         verbose(env,
10847                                                 "R%d partial copy of pointer\n",
10848                                                 insn->src_reg);
10849                                         return -EACCES;
10850                                 } else if (src_reg->type == SCALAR_VALUE) {
10851                                         *dst_reg = *src_reg;
10852                                         /* Make sure ID is cleared otherwise
10853                                          * dst_reg min/max could be incorrectly
10854                                          * propagated into src_reg by find_equal_scalars()
10855                                          */
10856                                         dst_reg->id = 0;
10857                                         dst_reg->live |= REG_LIVE_WRITTEN;
10858                                         dst_reg->subreg_def = env->insn_idx + 1;
10859                                 } else {
10860                                         mark_reg_unknown(env, regs,
10861                                                          insn->dst_reg);
10862                                 }
10863                                 zext_32_to_64(dst_reg);
10864                                 reg_bounds_sync(dst_reg);
10865                         }
10866                 } else {
10867                         /* case: R = imm
10868                          * remember the value we stored into this reg
10869                          */
10870                         /* clear any state __mark_reg_known doesn't set */
10871                         mark_reg_unknown(env, regs, insn->dst_reg);
10872                         regs[insn->dst_reg].type = SCALAR_VALUE;
10873                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
10874                                 __mark_reg_known(regs + insn->dst_reg,
10875                                                  insn->imm);
10876                         } else {
10877                                 __mark_reg_known(regs + insn->dst_reg,
10878                                                  (u32)insn->imm);
10879                         }
10880                 }
10881
10882         } else if (opcode > BPF_END) {
10883                 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
10884                 return -EINVAL;
10885
10886         } else {        /* all other ALU ops: and, sub, xor, add, ... */
10887
10888                 if (BPF_SRC(insn->code) == BPF_X) {
10889                         if (insn->imm != 0 || insn->off != 0) {
10890                                 verbose(env, "BPF_ALU uses reserved fields\n");
10891                                 return -EINVAL;
10892                         }
10893                         /* check src1 operand */
10894                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
10895                         if (err)
10896                                 return err;
10897                 } else {
10898                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
10899                                 verbose(env, "BPF_ALU uses reserved fields\n");
10900                                 return -EINVAL;
10901                         }
10902                 }
10903
10904                 /* check src2 operand */
10905                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
10906                 if (err)
10907                         return err;
10908
10909                 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
10910                     BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
10911                         verbose(env, "div by zero\n");
10912                         return -EINVAL;
10913                 }
10914
10915                 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
10916                      opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
10917                         int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
10918
10919                         if (insn->imm < 0 || insn->imm >= size) {
10920                                 verbose(env, "invalid shift %d\n", insn->imm);
10921                                 return -EINVAL;
10922                         }
10923                 }
10924
10925                 /* check dest operand */
10926                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
10927                 if (err)
10928                         return err;
10929
10930                 return adjust_reg_min_max_vals(env, insn);
10931         }
10932
10933         return 0;
10934 }
10935
10936 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
10937                                    struct bpf_reg_state *dst_reg,
10938                                    enum bpf_reg_type type,
10939                                    bool range_right_open)
10940 {
10941         struct bpf_func_state *state;
10942         struct bpf_reg_state *reg;
10943         int new_range;
10944
10945         if (dst_reg->off < 0 ||
10946             (dst_reg->off == 0 && range_right_open))
10947                 /* This doesn't give us any range */
10948                 return;
10949
10950         if (dst_reg->umax_value > MAX_PACKET_OFF ||
10951             dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
10952                 /* Risk of overflow.  For instance, ptr + (1<<63) may be less
10953                  * than pkt_end, but that's because it's also less than pkt.
10954                  */
10955                 return;
10956
10957         new_range = dst_reg->off;
10958         if (range_right_open)
10959                 new_range++;
10960
10961         /* Examples for register markings:
10962          *
10963          * pkt_data in dst register:
10964          *
10965          *   r2 = r3;
10966          *   r2 += 8;
10967          *   if (r2 > pkt_end) goto <handle exception>
10968          *   <access okay>
10969          *
10970          *   r2 = r3;
10971          *   r2 += 8;
10972          *   if (r2 < pkt_end) goto <access okay>
10973          *   <handle exception>
10974          *
10975          *   Where:
10976          *     r2 == dst_reg, pkt_end == src_reg
10977          *     r2=pkt(id=n,off=8,r=0)
10978          *     r3=pkt(id=n,off=0,r=0)
10979          *
10980          * pkt_data in src register:
10981          *
10982          *   r2 = r3;
10983          *   r2 += 8;
10984          *   if (pkt_end >= r2) goto <access okay>
10985          *   <handle exception>
10986          *
10987          *   r2 = r3;
10988          *   r2 += 8;
10989          *   if (pkt_end <= r2) goto <handle exception>
10990          *   <access okay>
10991          *
10992          *   Where:
10993          *     pkt_end == dst_reg, r2 == src_reg
10994          *     r2=pkt(id=n,off=8,r=0)
10995          *     r3=pkt(id=n,off=0,r=0)
10996          *
10997          * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
10998          * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
10999          * and [r3, r3 + 8-1) respectively is safe to access depending on
11000          * the check.
11001          */
11002
11003         /* If our ids match, then we must have the same max_value.  And we
11004          * don't care about the other reg's fixed offset, since if it's too big
11005          * the range won't allow anything.
11006          * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
11007          */
11008         bpf_for_each_reg_in_vstate(vstate, state, reg, ({
11009                 if (reg->type == type && reg->id == dst_reg->id)
11010                         /* keep the maximum range already checked */
11011                         reg->range = max(reg->range, new_range);
11012         }));
11013 }
11014
11015 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
11016 {
11017         struct tnum subreg = tnum_subreg(reg->var_off);
11018         s32 sval = (s32)val;
11019
11020         switch (opcode) {
11021         case BPF_JEQ:
11022                 if (tnum_is_const(subreg))
11023                         return !!tnum_equals_const(subreg, val);
11024                 break;
11025         case BPF_JNE:
11026                 if (tnum_is_const(subreg))
11027                         return !tnum_equals_const(subreg, val);
11028                 break;
11029         case BPF_JSET:
11030                 if ((~subreg.mask & subreg.value) & val)
11031                         return 1;
11032                 if (!((subreg.mask | subreg.value) & val))
11033                         return 0;
11034                 break;
11035         case BPF_JGT:
11036                 if (reg->u32_min_value > val)
11037                         return 1;
11038                 else if (reg->u32_max_value <= val)
11039                         return 0;
11040                 break;
11041         case BPF_JSGT:
11042                 if (reg->s32_min_value > sval)
11043                         return 1;
11044                 else if (reg->s32_max_value <= sval)
11045                         return 0;
11046                 break;
11047         case BPF_JLT:
11048                 if (reg->u32_max_value < val)
11049                         return 1;
11050                 else if (reg->u32_min_value >= val)
11051                         return 0;
11052                 break;
11053         case BPF_JSLT:
11054                 if (reg->s32_max_value < sval)
11055                         return 1;
11056                 else if (reg->s32_min_value >= sval)
11057                         return 0;
11058                 break;
11059         case BPF_JGE:
11060                 if (reg->u32_min_value >= val)
11061                         return 1;
11062                 else if (reg->u32_max_value < val)
11063                         return 0;
11064                 break;
11065         case BPF_JSGE:
11066                 if (reg->s32_min_value >= sval)
11067                         return 1;
11068                 else if (reg->s32_max_value < sval)
11069                         return 0;
11070                 break;
11071         case BPF_JLE:
11072                 if (reg->u32_max_value <= val)
11073                         return 1;
11074                 else if (reg->u32_min_value > val)
11075                         return 0;
11076                 break;
11077         case BPF_JSLE:
11078                 if (reg->s32_max_value <= sval)
11079                         return 1;
11080                 else if (reg->s32_min_value > sval)
11081                         return 0;
11082                 break;
11083         }
11084
11085         return -1;
11086 }
11087
11088
11089 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
11090 {
11091         s64 sval = (s64)val;
11092
11093         switch (opcode) {
11094         case BPF_JEQ:
11095                 if (tnum_is_const(reg->var_off))
11096                         return !!tnum_equals_const(reg->var_off, val);
11097                 break;
11098         case BPF_JNE:
11099                 if (tnum_is_const(reg->var_off))
11100                         return !tnum_equals_const(reg->var_off, val);
11101                 break;
11102         case BPF_JSET:
11103                 if ((~reg->var_off.mask & reg->var_off.value) & val)
11104                         return 1;
11105                 if (!((reg->var_off.mask | reg->var_off.value) & val))
11106                         return 0;
11107                 break;
11108         case BPF_JGT:
11109                 if (reg->umin_value > val)
11110                         return 1;
11111                 else if (reg->umax_value <= val)
11112                         return 0;
11113                 break;
11114         case BPF_JSGT:
11115                 if (reg->smin_value > sval)
11116                         return 1;
11117                 else if (reg->smax_value <= sval)
11118                         return 0;
11119                 break;
11120         case BPF_JLT:
11121                 if (reg->umax_value < val)
11122                         return 1;
11123                 else if (reg->umin_value >= val)
11124                         return 0;
11125                 break;
11126         case BPF_JSLT:
11127                 if (reg->smax_value < sval)
11128                         return 1;
11129                 else if (reg->smin_value >= sval)
11130                         return 0;
11131                 break;
11132         case BPF_JGE:
11133                 if (reg->umin_value >= val)
11134                         return 1;
11135                 else if (reg->umax_value < val)
11136                         return 0;
11137                 break;
11138         case BPF_JSGE:
11139                 if (reg->smin_value >= sval)
11140                         return 1;
11141                 else if (reg->smax_value < sval)
11142                         return 0;
11143                 break;
11144         case BPF_JLE:
11145                 if (reg->umax_value <= val)
11146                         return 1;
11147                 else if (reg->umin_value > val)
11148                         return 0;
11149                 break;
11150         case BPF_JSLE:
11151                 if (reg->smax_value <= sval)
11152                         return 1;
11153                 else if (reg->smin_value > sval)
11154                         return 0;
11155                 break;
11156         }
11157
11158         return -1;
11159 }
11160
11161 /* compute branch direction of the expression "if (reg opcode val) goto target;"
11162  * and return:
11163  *  1 - branch will be taken and "goto target" will be executed
11164  *  0 - branch will not be taken and fall-through to next insn
11165  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
11166  *      range [0,10]
11167  */
11168 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
11169                            bool is_jmp32)
11170 {
11171         if (__is_pointer_value(false, reg)) {
11172                 if (!reg_type_not_null(reg->type))
11173                         return -1;
11174
11175                 /* If pointer is valid tests against zero will fail so we can
11176                  * use this to direct branch taken.
11177                  */
11178                 if (val != 0)
11179                         return -1;
11180
11181                 switch (opcode) {
11182                 case BPF_JEQ:
11183                         return 0;
11184                 case BPF_JNE:
11185                         return 1;
11186                 default:
11187                         return -1;
11188                 }
11189         }
11190
11191         if (is_jmp32)
11192                 return is_branch32_taken(reg, val, opcode);
11193         return is_branch64_taken(reg, val, opcode);
11194 }
11195
11196 static int flip_opcode(u32 opcode)
11197 {
11198         /* How can we transform "a <op> b" into "b <op> a"? */
11199         static const u8 opcode_flip[16] = {
11200                 /* these stay the same */
11201                 [BPF_JEQ  >> 4] = BPF_JEQ,
11202                 [BPF_JNE  >> 4] = BPF_JNE,
11203                 [BPF_JSET >> 4] = BPF_JSET,
11204                 /* these swap "lesser" and "greater" (L and G in the opcodes) */
11205                 [BPF_JGE  >> 4] = BPF_JLE,
11206                 [BPF_JGT  >> 4] = BPF_JLT,
11207                 [BPF_JLE  >> 4] = BPF_JGE,
11208                 [BPF_JLT  >> 4] = BPF_JGT,
11209                 [BPF_JSGE >> 4] = BPF_JSLE,
11210                 [BPF_JSGT >> 4] = BPF_JSLT,
11211                 [BPF_JSLE >> 4] = BPF_JSGE,
11212                 [BPF_JSLT >> 4] = BPF_JSGT
11213         };
11214         return opcode_flip[opcode >> 4];
11215 }
11216
11217 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
11218                                    struct bpf_reg_state *src_reg,
11219                                    u8 opcode)
11220 {
11221         struct bpf_reg_state *pkt;
11222
11223         if (src_reg->type == PTR_TO_PACKET_END) {
11224                 pkt = dst_reg;
11225         } else if (dst_reg->type == PTR_TO_PACKET_END) {
11226                 pkt = src_reg;
11227                 opcode = flip_opcode(opcode);
11228         } else {
11229                 return -1;
11230         }
11231
11232         if (pkt->range >= 0)
11233                 return -1;
11234
11235         switch (opcode) {
11236         case BPF_JLE:
11237                 /* pkt <= pkt_end */
11238                 fallthrough;
11239         case BPF_JGT:
11240                 /* pkt > pkt_end */
11241                 if (pkt->range == BEYOND_PKT_END)
11242                         /* pkt has at last one extra byte beyond pkt_end */
11243                         return opcode == BPF_JGT;
11244                 break;
11245         case BPF_JLT:
11246                 /* pkt < pkt_end */
11247                 fallthrough;
11248         case BPF_JGE:
11249                 /* pkt >= pkt_end */
11250                 if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
11251                         return opcode == BPF_JGE;
11252                 break;
11253         }
11254         return -1;
11255 }
11256
11257 /* Adjusts the register min/max values in the case that the dst_reg is the
11258  * variable register that we are working on, and src_reg is a constant or we're
11259  * simply doing a BPF_K check.
11260  * In JEQ/JNE cases we also adjust the var_off values.
11261  */
11262 static void reg_set_min_max(struct bpf_reg_state *true_reg,
11263                             struct bpf_reg_state *false_reg,
11264                             u64 val, u32 val32,
11265                             u8 opcode, bool is_jmp32)
11266 {
11267         struct tnum false_32off = tnum_subreg(false_reg->var_off);
11268         struct tnum false_64off = false_reg->var_off;
11269         struct tnum true_32off = tnum_subreg(true_reg->var_off);
11270         struct tnum true_64off = true_reg->var_off;
11271         s64 sval = (s64)val;
11272         s32 sval32 = (s32)val32;
11273
11274         /* If the dst_reg is a pointer, we can't learn anything about its
11275          * variable offset from the compare (unless src_reg were a pointer into
11276          * the same object, but we don't bother with that.
11277          * Since false_reg and true_reg have the same type by construction, we
11278          * only need to check one of them for pointerness.
11279          */
11280         if (__is_pointer_value(false, false_reg))
11281                 return;
11282
11283         switch (opcode) {
11284         /* JEQ/JNE comparison doesn't change the register equivalence.
11285          *
11286          * r1 = r2;
11287          * if (r1 == 42) goto label;
11288          * ...
11289          * label: // here both r1 and r2 are known to be 42.
11290          *
11291          * Hence when marking register as known preserve it's ID.
11292          */
11293         case BPF_JEQ:
11294                 if (is_jmp32) {
11295                         __mark_reg32_known(true_reg, val32);
11296                         true_32off = tnum_subreg(true_reg->var_off);
11297                 } else {
11298                         ___mark_reg_known(true_reg, val);
11299                         true_64off = true_reg->var_off;
11300                 }
11301                 break;
11302         case BPF_JNE:
11303                 if (is_jmp32) {
11304                         __mark_reg32_known(false_reg, val32);
11305                         false_32off = tnum_subreg(false_reg->var_off);
11306                 } else {
11307                         ___mark_reg_known(false_reg, val);
11308                         false_64off = false_reg->var_off;
11309                 }
11310                 break;
11311         case BPF_JSET:
11312                 if (is_jmp32) {
11313                         false_32off = tnum_and(false_32off, tnum_const(~val32));
11314                         if (is_power_of_2(val32))
11315                                 true_32off = tnum_or(true_32off,
11316                                                      tnum_const(val32));
11317                 } else {
11318                         false_64off = tnum_and(false_64off, tnum_const(~val));
11319                         if (is_power_of_2(val))
11320                                 true_64off = tnum_or(true_64off,
11321                                                      tnum_const(val));
11322                 }
11323                 break;
11324         case BPF_JGE:
11325         case BPF_JGT:
11326         {
11327                 if (is_jmp32) {
11328                         u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
11329                         u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
11330
11331                         false_reg->u32_max_value = min(false_reg->u32_max_value,
11332                                                        false_umax);
11333                         true_reg->u32_min_value = max(true_reg->u32_min_value,
11334                                                       true_umin);
11335                 } else {
11336                         u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
11337                         u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
11338
11339                         false_reg->umax_value = min(false_reg->umax_value, false_umax);
11340                         true_reg->umin_value = max(true_reg->umin_value, true_umin);
11341                 }
11342                 break;
11343         }
11344         case BPF_JSGE:
11345         case BPF_JSGT:
11346         {
11347                 if (is_jmp32) {
11348                         s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
11349                         s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
11350
11351                         false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
11352                         true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
11353                 } else {
11354                         s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
11355                         s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
11356
11357                         false_reg->smax_value = min(false_reg->smax_value, false_smax);
11358                         true_reg->smin_value = max(true_reg->smin_value, true_smin);
11359                 }
11360                 break;
11361         }
11362         case BPF_JLE:
11363         case BPF_JLT:
11364         {
11365                 if (is_jmp32) {
11366                         u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
11367                         u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
11368
11369                         false_reg->u32_min_value = max(false_reg->u32_min_value,
11370                                                        false_umin);
11371                         true_reg->u32_max_value = min(true_reg->u32_max_value,
11372                                                       true_umax);
11373                 } else {
11374                         u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
11375                         u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
11376
11377                         false_reg->umin_value = max(false_reg->umin_value, false_umin);
11378                         true_reg->umax_value = min(true_reg->umax_value, true_umax);
11379                 }
11380                 break;
11381         }
11382         case BPF_JSLE:
11383         case BPF_JSLT:
11384         {
11385                 if (is_jmp32) {
11386                         s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
11387                         s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
11388
11389                         false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
11390                         true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
11391                 } else {
11392                         s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
11393                         s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
11394
11395                         false_reg->smin_value = max(false_reg->smin_value, false_smin);
11396                         true_reg->smax_value = min(true_reg->smax_value, true_smax);
11397                 }
11398                 break;
11399         }
11400         default:
11401                 return;
11402         }
11403
11404         if (is_jmp32) {
11405                 false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
11406                                              tnum_subreg(false_32off));
11407                 true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
11408                                             tnum_subreg(true_32off));
11409                 __reg_combine_32_into_64(false_reg);
11410                 __reg_combine_32_into_64(true_reg);
11411         } else {
11412                 false_reg->var_off = false_64off;
11413                 true_reg->var_off = true_64off;
11414                 __reg_combine_64_into_32(false_reg);
11415                 __reg_combine_64_into_32(true_reg);
11416         }
11417 }
11418
11419 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
11420  * the variable reg.
11421  */
11422 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
11423                                 struct bpf_reg_state *false_reg,
11424                                 u64 val, u32 val32,
11425                                 u8 opcode, bool is_jmp32)
11426 {
11427         opcode = flip_opcode(opcode);
11428         /* This uses zero as "not present in table"; luckily the zero opcode,
11429          * BPF_JA, can't get here.
11430          */
11431         if (opcode)
11432                 reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
11433 }
11434
11435 /* Regs are known to be equal, so intersect their min/max/var_off */
11436 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
11437                                   struct bpf_reg_state *dst_reg)
11438 {
11439         src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
11440                                                         dst_reg->umin_value);
11441         src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
11442                                                         dst_reg->umax_value);
11443         src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
11444                                                         dst_reg->smin_value);
11445         src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
11446                                                         dst_reg->smax_value);
11447         src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
11448                                                              dst_reg->var_off);
11449         reg_bounds_sync(src_reg);
11450         reg_bounds_sync(dst_reg);
11451 }
11452
11453 static void reg_combine_min_max(struct bpf_reg_state *true_src,
11454                                 struct bpf_reg_state *true_dst,
11455                                 struct bpf_reg_state *false_src,
11456                                 struct bpf_reg_state *false_dst,
11457                                 u8 opcode)
11458 {
11459         switch (opcode) {
11460         case BPF_JEQ:
11461                 __reg_combine_min_max(true_src, true_dst);
11462                 break;
11463         case BPF_JNE:
11464                 __reg_combine_min_max(false_src, false_dst);
11465                 break;
11466         }
11467 }
11468
11469 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
11470                                  struct bpf_reg_state *reg, u32 id,
11471                                  bool is_null)
11472 {
11473         if (type_may_be_null(reg->type) && reg->id == id &&
11474             (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
11475                 /* Old offset (both fixed and variable parts) should have been
11476                  * known-zero, because we don't allow pointer arithmetic on
11477                  * pointers that might be NULL. If we see this happening, don't
11478                  * convert the register.
11479                  *
11480                  * But in some cases, some helpers that return local kptrs
11481                  * advance offset for the returned pointer. In those cases, it
11482                  * is fine to expect to see reg->off.
11483                  */
11484                 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
11485                         return;
11486                 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL) && WARN_ON_ONCE(reg->off))
11487                         return;
11488                 if (is_null) {
11489                         reg->type = SCALAR_VALUE;
11490                         /* We don't need id and ref_obj_id from this point
11491                          * onwards anymore, thus we should better reset it,
11492                          * so that state pruning has chances to take effect.
11493                          */
11494                         reg->id = 0;
11495                         reg->ref_obj_id = 0;
11496
11497                         return;
11498                 }
11499
11500                 mark_ptr_not_null_reg(reg);
11501
11502                 if (!reg_may_point_to_spin_lock(reg)) {
11503                         /* For not-NULL ptr, reg->ref_obj_id will be reset
11504                          * in release_reference().
11505                          *
11506                          * reg->id is still used by spin_lock ptr. Other
11507                          * than spin_lock ptr type, reg->id can be reset.
11508                          */
11509                         reg->id = 0;
11510                 }
11511         }
11512 }
11513
11514 /* The logic is similar to find_good_pkt_pointers(), both could eventually
11515  * be folded together at some point.
11516  */
11517 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
11518                                   bool is_null)
11519 {
11520         struct bpf_func_state *state = vstate->frame[vstate->curframe];
11521         struct bpf_reg_state *regs = state->regs, *reg;
11522         u32 ref_obj_id = regs[regno].ref_obj_id;
11523         u32 id = regs[regno].id;
11524
11525         if (ref_obj_id && ref_obj_id == id && is_null)
11526                 /* regs[regno] is in the " == NULL" branch.
11527                  * No one could have freed the reference state before
11528                  * doing the NULL check.
11529                  */
11530                 WARN_ON_ONCE(release_reference_state(state, id));
11531
11532         bpf_for_each_reg_in_vstate(vstate, state, reg, ({
11533                 mark_ptr_or_null_reg(state, reg, id, is_null);
11534         }));
11535 }
11536
11537 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
11538                                    struct bpf_reg_state *dst_reg,
11539                                    struct bpf_reg_state *src_reg,
11540                                    struct bpf_verifier_state *this_branch,
11541                                    struct bpf_verifier_state *other_branch)
11542 {
11543         if (BPF_SRC(insn->code) != BPF_X)
11544                 return false;
11545
11546         /* Pointers are always 64-bit. */
11547         if (BPF_CLASS(insn->code) == BPF_JMP32)
11548                 return false;
11549
11550         switch (BPF_OP(insn->code)) {
11551         case BPF_JGT:
11552                 if ((dst_reg->type == PTR_TO_PACKET &&
11553                      src_reg->type == PTR_TO_PACKET_END) ||
11554                     (dst_reg->type == PTR_TO_PACKET_META &&
11555                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
11556                         /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
11557                         find_good_pkt_pointers(this_branch, dst_reg,
11558                                                dst_reg->type, false);
11559                         mark_pkt_end(other_branch, insn->dst_reg, true);
11560                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
11561                             src_reg->type == PTR_TO_PACKET) ||
11562                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
11563                             src_reg->type == PTR_TO_PACKET_META)) {
11564                         /* pkt_end > pkt_data', pkt_data > pkt_meta' */
11565                         find_good_pkt_pointers(other_branch, src_reg,
11566                                                src_reg->type, true);
11567                         mark_pkt_end(this_branch, insn->src_reg, false);
11568                 } else {
11569                         return false;
11570                 }
11571                 break;
11572         case BPF_JLT:
11573                 if ((dst_reg->type == PTR_TO_PACKET &&
11574                      src_reg->type == PTR_TO_PACKET_END) ||
11575                     (dst_reg->type == PTR_TO_PACKET_META &&
11576                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
11577                         /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
11578                         find_good_pkt_pointers(other_branch, dst_reg,
11579                                                dst_reg->type, true);
11580                         mark_pkt_end(this_branch, insn->dst_reg, false);
11581                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
11582                             src_reg->type == PTR_TO_PACKET) ||
11583                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
11584                             src_reg->type == PTR_TO_PACKET_META)) {
11585                         /* pkt_end < pkt_data', pkt_data > pkt_meta' */
11586                         find_good_pkt_pointers(this_branch, src_reg,
11587                                                src_reg->type, false);
11588                         mark_pkt_end(other_branch, insn->src_reg, true);
11589                 } else {
11590                         return false;
11591                 }
11592                 break;
11593         case BPF_JGE:
11594                 if ((dst_reg->type == PTR_TO_PACKET &&
11595                      src_reg->type == PTR_TO_PACKET_END) ||
11596                     (dst_reg->type == PTR_TO_PACKET_META &&
11597                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
11598                         /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
11599                         find_good_pkt_pointers(this_branch, dst_reg,
11600                                                dst_reg->type, true);
11601                         mark_pkt_end(other_branch, insn->dst_reg, false);
11602                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
11603                             src_reg->type == PTR_TO_PACKET) ||
11604                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
11605                             src_reg->type == PTR_TO_PACKET_META)) {
11606                         /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
11607                         find_good_pkt_pointers(other_branch, src_reg,
11608                                                src_reg->type, false);
11609                         mark_pkt_end(this_branch, insn->src_reg, true);
11610                 } else {
11611                         return false;
11612                 }
11613                 break;
11614         case BPF_JLE:
11615                 if ((dst_reg->type == PTR_TO_PACKET &&
11616                      src_reg->type == PTR_TO_PACKET_END) ||
11617                     (dst_reg->type == PTR_TO_PACKET_META &&
11618                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
11619                         /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
11620                         find_good_pkt_pointers(other_branch, dst_reg,
11621                                                dst_reg->type, false);
11622                         mark_pkt_end(this_branch, insn->dst_reg, true);
11623                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
11624                             src_reg->type == PTR_TO_PACKET) ||
11625                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
11626                             src_reg->type == PTR_TO_PACKET_META)) {
11627                         /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
11628                         find_good_pkt_pointers(this_branch, src_reg,
11629                                                src_reg->type, true);
11630                         mark_pkt_end(other_branch, insn->src_reg, false);
11631                 } else {
11632                         return false;
11633                 }
11634                 break;
11635         default:
11636                 return false;
11637         }
11638
11639         return true;
11640 }
11641
11642 static void find_equal_scalars(struct bpf_verifier_state *vstate,
11643                                struct bpf_reg_state *known_reg)
11644 {
11645         struct bpf_func_state *state;
11646         struct bpf_reg_state *reg;
11647
11648         bpf_for_each_reg_in_vstate(vstate, state, reg, ({
11649                 if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
11650                         *reg = *known_reg;
11651         }));
11652 }
11653
11654 static int check_cond_jmp_op(struct bpf_verifier_env *env,
11655                              struct bpf_insn *insn, int *insn_idx)
11656 {
11657         struct bpf_verifier_state *this_branch = env->cur_state;
11658         struct bpf_verifier_state *other_branch;
11659         struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
11660         struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
11661         struct bpf_reg_state *eq_branch_regs;
11662         u8 opcode = BPF_OP(insn->code);
11663         bool is_jmp32;
11664         int pred = -1;
11665         int err;
11666
11667         /* Only conditional jumps are expected to reach here. */
11668         if (opcode == BPF_JA || opcode > BPF_JSLE) {
11669                 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
11670                 return -EINVAL;
11671         }
11672
11673         if (BPF_SRC(insn->code) == BPF_X) {
11674                 if (insn->imm != 0) {
11675                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
11676                         return -EINVAL;
11677                 }
11678
11679                 /* check src1 operand */
11680                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
11681                 if (err)
11682                         return err;
11683
11684                 if (is_pointer_value(env, insn->src_reg)) {
11685                         verbose(env, "R%d pointer comparison prohibited\n",
11686                                 insn->src_reg);
11687                         return -EACCES;
11688                 }
11689                 src_reg = &regs[insn->src_reg];
11690         } else {
11691                 if (insn->src_reg != BPF_REG_0) {
11692                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
11693                         return -EINVAL;
11694                 }
11695         }
11696
11697         /* check src2 operand */
11698         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11699         if (err)
11700                 return err;
11701
11702         dst_reg = &regs[insn->dst_reg];
11703         is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
11704
11705         if (BPF_SRC(insn->code) == BPF_K) {
11706                 pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
11707         } else if (src_reg->type == SCALAR_VALUE &&
11708                    is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
11709                 pred = is_branch_taken(dst_reg,
11710                                        tnum_subreg(src_reg->var_off).value,
11711                                        opcode,
11712                                        is_jmp32);
11713         } else if (src_reg->type == SCALAR_VALUE &&
11714                    !is_jmp32 && tnum_is_const(src_reg->var_off)) {
11715                 pred = is_branch_taken(dst_reg,
11716                                        src_reg->var_off.value,
11717                                        opcode,
11718                                        is_jmp32);
11719         } else if (reg_is_pkt_pointer_any(dst_reg) &&
11720                    reg_is_pkt_pointer_any(src_reg) &&
11721                    !is_jmp32) {
11722                 pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
11723         }
11724
11725         if (pred >= 0) {
11726                 /* If we get here with a dst_reg pointer type it is because
11727                  * above is_branch_taken() special cased the 0 comparison.
11728                  */
11729                 if (!__is_pointer_value(false, dst_reg))
11730                         err = mark_chain_precision(env, insn->dst_reg);
11731                 if (BPF_SRC(insn->code) == BPF_X && !err &&
11732                     !__is_pointer_value(false, src_reg))
11733                         err = mark_chain_precision(env, insn->src_reg);
11734                 if (err)
11735                         return err;
11736         }
11737
11738         if (pred == 1) {
11739                 /* Only follow the goto, ignore fall-through. If needed, push
11740                  * the fall-through branch for simulation under speculative
11741                  * execution.
11742                  */
11743                 if (!env->bypass_spec_v1 &&
11744                     !sanitize_speculative_path(env, insn, *insn_idx + 1,
11745                                                *insn_idx))
11746                         return -EFAULT;
11747                 *insn_idx += insn->off;
11748                 return 0;
11749         } else if (pred == 0) {
11750                 /* Only follow the fall-through branch, since that's where the
11751                  * program will go. If needed, push the goto branch for
11752                  * simulation under speculative execution.
11753                  */
11754                 if (!env->bypass_spec_v1 &&
11755                     !sanitize_speculative_path(env, insn,
11756                                                *insn_idx + insn->off + 1,
11757                                                *insn_idx))
11758                         return -EFAULT;
11759                 return 0;
11760         }
11761
11762         other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
11763                                   false);
11764         if (!other_branch)
11765                 return -EFAULT;
11766         other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
11767
11768         /* detect if we are comparing against a constant value so we can adjust
11769          * our min/max values for our dst register.
11770          * this is only legit if both are scalars (or pointers to the same
11771          * object, I suppose, see the PTR_MAYBE_NULL related if block below),
11772          * because otherwise the different base pointers mean the offsets aren't
11773          * comparable.
11774          */
11775         if (BPF_SRC(insn->code) == BPF_X) {
11776                 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
11777
11778                 if (dst_reg->type == SCALAR_VALUE &&
11779                     src_reg->type == SCALAR_VALUE) {
11780                         if (tnum_is_const(src_reg->var_off) ||
11781                             (is_jmp32 &&
11782                              tnum_is_const(tnum_subreg(src_reg->var_off))))
11783                                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
11784                                                 dst_reg,
11785                                                 src_reg->var_off.value,
11786                                                 tnum_subreg(src_reg->var_off).value,
11787                                                 opcode, is_jmp32);
11788                         else if (tnum_is_const(dst_reg->var_off) ||
11789                                  (is_jmp32 &&
11790                                   tnum_is_const(tnum_subreg(dst_reg->var_off))))
11791                                 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
11792                                                     src_reg,
11793                                                     dst_reg->var_off.value,
11794                                                     tnum_subreg(dst_reg->var_off).value,
11795                                                     opcode, is_jmp32);
11796                         else if (!is_jmp32 &&
11797                                  (opcode == BPF_JEQ || opcode == BPF_JNE))
11798                                 /* Comparing for equality, we can combine knowledge */
11799                                 reg_combine_min_max(&other_branch_regs[insn->src_reg],
11800                                                     &other_branch_regs[insn->dst_reg],
11801                                                     src_reg, dst_reg, opcode);
11802                         if (src_reg->id &&
11803                             !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
11804                                 find_equal_scalars(this_branch, src_reg);
11805                                 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
11806                         }
11807
11808                 }
11809         } else if (dst_reg->type == SCALAR_VALUE) {
11810                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
11811                                         dst_reg, insn->imm, (u32)insn->imm,
11812                                         opcode, is_jmp32);
11813         }
11814
11815         if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
11816             !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
11817                 find_equal_scalars(this_branch, dst_reg);
11818                 find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
11819         }
11820
11821         /* if one pointer register is compared to another pointer
11822          * register check if PTR_MAYBE_NULL could be lifted.
11823          * E.g. register A - maybe null
11824          *      register B - not null
11825          * for JNE A, B, ... - A is not null in the false branch;
11826          * for JEQ A, B, ... - A is not null in the true branch.
11827          *
11828          * Since PTR_TO_BTF_ID points to a kernel struct that does
11829          * not need to be null checked by the BPF program, i.e.,
11830          * could be null even without PTR_MAYBE_NULL marking, so
11831          * only propagate nullness when neither reg is that type.
11832          */
11833         if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
11834             __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
11835             type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
11836             base_type(src_reg->type) != PTR_TO_BTF_ID &&
11837             base_type(dst_reg->type) != PTR_TO_BTF_ID) {
11838                 eq_branch_regs = NULL;
11839                 switch (opcode) {
11840                 case BPF_JEQ:
11841                         eq_branch_regs = other_branch_regs;
11842                         break;
11843                 case BPF_JNE:
11844                         eq_branch_regs = regs;
11845                         break;
11846                 default:
11847                         /* do nothing */
11848                         break;
11849                 }
11850                 if (eq_branch_regs) {
11851                         if (type_may_be_null(src_reg->type))
11852                                 mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
11853                         else
11854                                 mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
11855                 }
11856         }
11857
11858         /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
11859          * NOTE: these optimizations below are related with pointer comparison
11860          *       which will never be JMP32.
11861          */
11862         if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
11863             insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
11864             type_may_be_null(dst_reg->type)) {
11865                 /* Mark all identical registers in each branch as either
11866                  * safe or unknown depending R == 0 or R != 0 conditional.
11867                  */
11868                 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
11869                                       opcode == BPF_JNE);
11870                 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
11871                                       opcode == BPF_JEQ);
11872         } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
11873                                            this_branch, other_branch) &&
11874                    is_pointer_value(env, insn->dst_reg)) {
11875                 verbose(env, "R%d pointer comparison prohibited\n",
11876                         insn->dst_reg);
11877                 return -EACCES;
11878         }
11879         if (env->log.level & BPF_LOG_LEVEL)
11880                 print_insn_state(env, this_branch->frame[this_branch->curframe]);
11881         return 0;
11882 }
11883
11884 /* verify BPF_LD_IMM64 instruction */
11885 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
11886 {
11887         struct bpf_insn_aux_data *aux = cur_aux(env);
11888         struct bpf_reg_state *regs = cur_regs(env);
11889         struct bpf_reg_state *dst_reg;
11890         struct bpf_map *map;
11891         int err;
11892
11893         if (BPF_SIZE(insn->code) != BPF_DW) {
11894                 verbose(env, "invalid BPF_LD_IMM insn\n");
11895                 return -EINVAL;
11896         }
11897         if (insn->off != 0) {
11898                 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
11899                 return -EINVAL;
11900         }
11901
11902         err = check_reg_arg(env, insn->dst_reg, DST_OP);
11903         if (err)
11904                 return err;
11905
11906         dst_reg = &regs[insn->dst_reg];
11907         if (insn->src_reg == 0) {
11908                 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
11909
11910                 dst_reg->type = SCALAR_VALUE;
11911                 __mark_reg_known(&regs[insn->dst_reg], imm);
11912                 return 0;
11913         }
11914
11915         /* All special src_reg cases are listed below. From this point onwards
11916          * we either succeed and assign a corresponding dst_reg->type after
11917          * zeroing the offset, or fail and reject the program.
11918          */
11919         mark_reg_known_zero(env, regs, insn->dst_reg);
11920
11921         if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
11922                 dst_reg->type = aux->btf_var.reg_type;
11923                 switch (base_type(dst_reg->type)) {
11924                 case PTR_TO_MEM:
11925                         dst_reg->mem_size = aux->btf_var.mem_size;
11926                         break;
11927                 case PTR_TO_BTF_ID:
11928                         dst_reg->btf = aux->btf_var.btf;
11929                         dst_reg->btf_id = aux->btf_var.btf_id;
11930                         break;
11931                 default:
11932                         verbose(env, "bpf verifier is misconfigured\n");
11933                         return -EFAULT;
11934                 }
11935                 return 0;
11936         }
11937
11938         if (insn->src_reg == BPF_PSEUDO_FUNC) {
11939                 struct bpf_prog_aux *aux = env->prog->aux;
11940                 u32 subprogno = find_subprog(env,
11941                                              env->insn_idx + insn->imm + 1);
11942
11943                 if (!aux->func_info) {
11944                         verbose(env, "missing btf func_info\n");
11945                         return -EINVAL;
11946                 }
11947                 if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
11948                         verbose(env, "callback function not static\n");
11949                         return -EINVAL;
11950                 }
11951
11952                 dst_reg->type = PTR_TO_FUNC;
11953                 dst_reg->subprogno = subprogno;
11954                 return 0;
11955         }
11956
11957         map = env->used_maps[aux->map_index];
11958         dst_reg->map_ptr = map;
11959
11960         if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
11961             insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
11962                 dst_reg->type = PTR_TO_MAP_VALUE;
11963                 dst_reg->off = aux->map_off;
11964                 WARN_ON_ONCE(map->max_entries != 1);
11965                 /* We want reg->id to be same (0) as map_value is not distinct */
11966         } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
11967                    insn->src_reg == BPF_PSEUDO_MAP_IDX) {
11968                 dst_reg->type = CONST_PTR_TO_MAP;
11969         } else {
11970                 verbose(env, "bpf verifier is misconfigured\n");
11971                 return -EINVAL;
11972         }
11973
11974         return 0;
11975 }
11976
11977 static bool may_access_skb(enum bpf_prog_type type)
11978 {
11979         switch (type) {
11980         case BPF_PROG_TYPE_SOCKET_FILTER:
11981         case BPF_PROG_TYPE_SCHED_CLS:
11982         case BPF_PROG_TYPE_SCHED_ACT:
11983                 return true;
11984         default:
11985                 return false;
11986         }
11987 }
11988
11989 /* verify safety of LD_ABS|LD_IND instructions:
11990  * - they can only appear in the programs where ctx == skb
11991  * - since they are wrappers of function calls, they scratch R1-R5 registers,
11992  *   preserve R6-R9, and store return value into R0
11993  *
11994  * Implicit input:
11995  *   ctx == skb == R6 == CTX
11996  *
11997  * Explicit input:
11998  *   SRC == any register
11999  *   IMM == 32-bit immediate
12000  *
12001  * Output:
12002  *   R0 - 8/16/32-bit skb data converted to cpu endianness
12003  */
12004 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
12005 {
12006         struct bpf_reg_state *regs = cur_regs(env);
12007         static const int ctx_reg = BPF_REG_6;
12008         u8 mode = BPF_MODE(insn->code);
12009         int i, err;
12010
12011         if (!may_access_skb(resolve_prog_type(env->prog))) {
12012                 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
12013                 return -EINVAL;
12014         }
12015
12016         if (!env->ops->gen_ld_abs) {
12017                 verbose(env, "bpf verifier is misconfigured\n");
12018                 return -EINVAL;
12019         }
12020
12021         if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
12022             BPF_SIZE(insn->code) == BPF_DW ||
12023             (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
12024                 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
12025                 return -EINVAL;
12026         }
12027
12028         /* check whether implicit source operand (register R6) is readable */
12029         err = check_reg_arg(env, ctx_reg, SRC_OP);
12030         if (err)
12031                 return err;
12032
12033         /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
12034          * gen_ld_abs() may terminate the program at runtime, leading to
12035          * reference leak.
12036          */
12037         err = check_reference_leak(env);
12038         if (err) {
12039                 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
12040                 return err;
12041         }
12042
12043         if (env->cur_state->active_lock.ptr) {
12044                 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
12045                 return -EINVAL;
12046         }
12047
12048         if (env->cur_state->active_rcu_lock) {
12049                 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
12050                 return -EINVAL;
12051         }
12052
12053         if (regs[ctx_reg].type != PTR_TO_CTX) {
12054                 verbose(env,
12055                         "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
12056                 return -EINVAL;
12057         }
12058
12059         if (mode == BPF_IND) {
12060                 /* check explicit source operand */
12061                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
12062                 if (err)
12063                         return err;
12064         }
12065
12066         err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
12067         if (err < 0)
12068                 return err;
12069
12070         /* reset caller saved regs to unreadable */
12071         for (i = 0; i < CALLER_SAVED_REGS; i++) {
12072                 mark_reg_not_init(env, regs, caller_saved[i]);
12073                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
12074         }
12075
12076         /* mark destination R0 register as readable, since it contains
12077          * the value fetched from the packet.
12078          * Already marked as written above.
12079          */
12080         mark_reg_unknown(env, regs, BPF_REG_0);
12081         /* ld_abs load up to 32-bit skb data. */
12082         regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
12083         return 0;
12084 }
12085
12086 static int check_return_code(struct bpf_verifier_env *env)
12087 {
12088         struct tnum enforce_attach_type_range = tnum_unknown;
12089         const struct bpf_prog *prog = env->prog;
12090         struct bpf_reg_state *reg;
12091         struct tnum range = tnum_range(0, 1);
12092         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
12093         int err;
12094         struct bpf_func_state *frame = env->cur_state->frame[0];
12095         const bool is_subprog = frame->subprogno;
12096
12097         /* LSM and struct_ops func-ptr's return type could be "void" */
12098         if (!is_subprog) {
12099                 switch (prog_type) {
12100                 case BPF_PROG_TYPE_LSM:
12101                         if (prog->expected_attach_type == BPF_LSM_CGROUP)
12102                                 /* See below, can be 0 or 0-1 depending on hook. */
12103                                 break;
12104                         fallthrough;
12105                 case BPF_PROG_TYPE_STRUCT_OPS:
12106                         if (!prog->aux->attach_func_proto->type)
12107                                 return 0;
12108                         break;
12109                 default:
12110                         break;
12111                 }
12112         }
12113
12114         /* eBPF calling convention is such that R0 is used
12115          * to return the value from eBPF program.
12116          * Make sure that it's readable at this time
12117          * of bpf_exit, which means that program wrote
12118          * something into it earlier
12119          */
12120         err = check_reg_arg(env, BPF_REG_0, SRC_OP);
12121         if (err)
12122                 return err;
12123
12124         if (is_pointer_value(env, BPF_REG_0)) {
12125                 verbose(env, "R0 leaks addr as return value\n");
12126                 return -EACCES;
12127         }
12128
12129         reg = cur_regs(env) + BPF_REG_0;
12130
12131         if (frame->in_async_callback_fn) {
12132                 /* enforce return zero from async callbacks like timer */
12133                 if (reg->type != SCALAR_VALUE) {
12134                         verbose(env, "In async callback the register R0 is not a known value (%s)\n",
12135                                 reg_type_str(env, reg->type));
12136                         return -EINVAL;
12137                 }
12138
12139                 if (!tnum_in(tnum_const(0), reg->var_off)) {
12140                         verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
12141                         return -EINVAL;
12142                 }
12143                 return 0;
12144         }
12145
12146         if (is_subprog) {
12147                 if (reg->type != SCALAR_VALUE) {
12148                         verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
12149                                 reg_type_str(env, reg->type));
12150                         return -EINVAL;
12151                 }
12152                 return 0;
12153         }
12154
12155         switch (prog_type) {
12156         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
12157                 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
12158                     env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
12159                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
12160                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
12161                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
12162                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
12163                         range = tnum_range(1, 1);
12164                 if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
12165                     env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
12166                         range = tnum_range(0, 3);
12167                 break;
12168         case BPF_PROG_TYPE_CGROUP_SKB:
12169                 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
12170                         range = tnum_range(0, 3);
12171                         enforce_attach_type_range = tnum_range(2, 3);
12172                 }
12173                 break;
12174         case BPF_PROG_TYPE_CGROUP_SOCK:
12175         case BPF_PROG_TYPE_SOCK_OPS:
12176         case BPF_PROG_TYPE_CGROUP_DEVICE:
12177         case BPF_PROG_TYPE_CGROUP_SYSCTL:
12178         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
12179                 break;
12180         case BPF_PROG_TYPE_RAW_TRACEPOINT:
12181                 if (!env->prog->aux->attach_btf_id)
12182                         return 0;
12183                 range = tnum_const(0);
12184                 break;
12185         case BPF_PROG_TYPE_TRACING:
12186                 switch (env->prog->expected_attach_type) {
12187                 case BPF_TRACE_FENTRY:
12188                 case BPF_TRACE_FEXIT:
12189                         range = tnum_const(0);
12190                         break;
12191                 case BPF_TRACE_RAW_TP:
12192                 case BPF_MODIFY_RETURN:
12193                         return 0;
12194                 case BPF_TRACE_ITER:
12195                         break;
12196                 default:
12197                         return -ENOTSUPP;
12198                 }
12199                 break;
12200         case BPF_PROG_TYPE_SK_LOOKUP:
12201                 range = tnum_range(SK_DROP, SK_PASS);
12202                 break;
12203
12204         case BPF_PROG_TYPE_LSM:
12205                 if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
12206                         /* Regular BPF_PROG_TYPE_LSM programs can return
12207                          * any value.
12208                          */
12209                         return 0;
12210                 }
12211                 if (!env->prog->aux->attach_func_proto->type) {
12212                         /* Make sure programs that attach to void
12213                          * hooks don't try to modify return value.
12214                          */
12215                         range = tnum_range(1, 1);
12216                 }
12217                 break;
12218
12219         case BPF_PROG_TYPE_EXT:
12220                 /* freplace program can return anything as its return value
12221                  * depends on the to-be-replaced kernel func or bpf program.
12222                  */
12223         default:
12224                 return 0;
12225         }
12226
12227         if (reg->type != SCALAR_VALUE) {
12228                 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
12229                         reg_type_str(env, reg->type));
12230                 return -EINVAL;
12231         }
12232
12233         if (!tnum_in(range, reg->var_off)) {
12234                 verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
12235                 if (prog->expected_attach_type == BPF_LSM_CGROUP &&
12236                     prog_type == BPF_PROG_TYPE_LSM &&
12237                     !prog->aux->attach_func_proto->type)
12238                         verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
12239                 return -EINVAL;
12240         }
12241
12242         if (!tnum_is_unknown(enforce_attach_type_range) &&
12243             tnum_in(enforce_attach_type_range, reg->var_off))
12244                 env->prog->enforce_expected_attach_type = 1;
12245         return 0;
12246 }
12247
12248 /* non-recursive DFS pseudo code
12249  * 1  procedure DFS-iterative(G,v):
12250  * 2      label v as discovered
12251  * 3      let S be a stack
12252  * 4      S.push(v)
12253  * 5      while S is not empty
12254  * 6            t <- S.peek()
12255  * 7            if t is what we're looking for:
12256  * 8                return t
12257  * 9            for all edges e in G.adjacentEdges(t) do
12258  * 10               if edge e is already labelled
12259  * 11                   continue with the next edge
12260  * 12               w <- G.adjacentVertex(t,e)
12261  * 13               if vertex w is not discovered and not explored
12262  * 14                   label e as tree-edge
12263  * 15                   label w as discovered
12264  * 16                   S.push(w)
12265  * 17                   continue at 5
12266  * 18               else if vertex w is discovered
12267  * 19                   label e as back-edge
12268  * 20               else
12269  * 21                   // vertex w is explored
12270  * 22                   label e as forward- or cross-edge
12271  * 23           label t as explored
12272  * 24           S.pop()
12273  *
12274  * convention:
12275  * 0x10 - discovered
12276  * 0x11 - discovered and fall-through edge labelled
12277  * 0x12 - discovered and fall-through and branch edges labelled
12278  * 0x20 - explored
12279  */
12280
12281 enum {
12282         DISCOVERED = 0x10,
12283         EXPLORED = 0x20,
12284         FALLTHROUGH = 1,
12285         BRANCH = 2,
12286 };
12287
12288 static u32 state_htab_size(struct bpf_verifier_env *env)
12289 {
12290         return env->prog->len;
12291 }
12292
12293 static struct bpf_verifier_state_list **explored_state(
12294                                         struct bpf_verifier_env *env,
12295                                         int idx)
12296 {
12297         struct bpf_verifier_state *cur = env->cur_state;
12298         struct bpf_func_state *state = cur->frame[cur->curframe];
12299
12300         return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
12301 }
12302
12303 static void mark_prune_point(struct bpf_verifier_env *env, int idx)
12304 {
12305         env->insn_aux_data[idx].prune_point = true;
12306 }
12307
12308 static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
12309 {
12310         return env->insn_aux_data[insn_idx].prune_point;
12311 }
12312
12313 enum {
12314         DONE_EXPLORING = 0,
12315         KEEP_EXPLORING = 1,
12316 };
12317
12318 /* t, w, e - match pseudo-code above:
12319  * t - index of current instruction
12320  * w - next instruction
12321  * e - edge
12322  */
12323 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
12324                      bool loop_ok)
12325 {
12326         int *insn_stack = env->cfg.insn_stack;
12327         int *insn_state = env->cfg.insn_state;
12328
12329         if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
12330                 return DONE_EXPLORING;
12331
12332         if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
12333                 return DONE_EXPLORING;
12334
12335         if (w < 0 || w >= env->prog->len) {
12336                 verbose_linfo(env, t, "%d: ", t);
12337                 verbose(env, "jump out of range from insn %d to %d\n", t, w);
12338                 return -EINVAL;
12339         }
12340
12341         if (e == BRANCH) {
12342                 /* mark branch target for state pruning */
12343                 mark_prune_point(env, w);
12344                 mark_jmp_point(env, w);
12345         }
12346
12347         if (insn_state[w] == 0) {
12348                 /* tree-edge */
12349                 insn_state[t] = DISCOVERED | e;
12350                 insn_state[w] = DISCOVERED;
12351                 if (env->cfg.cur_stack >= env->prog->len)
12352                         return -E2BIG;
12353                 insn_stack[env->cfg.cur_stack++] = w;
12354                 return KEEP_EXPLORING;
12355         } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
12356                 if (loop_ok && env->bpf_capable)
12357                         return DONE_EXPLORING;
12358                 verbose_linfo(env, t, "%d: ", t);
12359                 verbose_linfo(env, w, "%d: ", w);
12360                 verbose(env, "back-edge from insn %d to %d\n", t, w);
12361                 return -EINVAL;
12362         } else if (insn_state[w] == EXPLORED) {
12363                 /* forward- or cross-edge */
12364                 insn_state[t] = DISCOVERED | e;
12365         } else {
12366                 verbose(env, "insn state internal bug\n");
12367                 return -EFAULT;
12368         }
12369         return DONE_EXPLORING;
12370 }
12371
12372 static int visit_func_call_insn(int t, struct bpf_insn *insns,
12373                                 struct bpf_verifier_env *env,
12374                                 bool visit_callee)
12375 {
12376         int ret;
12377
12378         ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
12379         if (ret)
12380                 return ret;
12381
12382         mark_prune_point(env, t + 1);
12383         /* when we exit from subprog, we need to record non-linear history */
12384         mark_jmp_point(env, t + 1);
12385
12386         if (visit_callee) {
12387                 mark_prune_point(env, t);
12388                 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
12389                                 /* It's ok to allow recursion from CFG point of
12390                                  * view. __check_func_call() will do the actual
12391                                  * check.
12392                                  */
12393                                 bpf_pseudo_func(insns + t));
12394         }
12395         return ret;
12396 }
12397
12398 /* Visits the instruction at index t and returns one of the following:
12399  *  < 0 - an error occurred
12400  *  DONE_EXPLORING - the instruction was fully explored
12401  *  KEEP_EXPLORING - there is still work to be done before it is fully explored
12402  */
12403 static int visit_insn(int t, struct bpf_verifier_env *env)
12404 {
12405         struct bpf_insn *insns = env->prog->insnsi;
12406         int ret;
12407
12408         if (bpf_pseudo_func(insns + t))
12409                 return visit_func_call_insn(t, insns, env, true);
12410
12411         /* All non-branch instructions have a single fall-through edge. */
12412         if (BPF_CLASS(insns[t].code) != BPF_JMP &&
12413             BPF_CLASS(insns[t].code) != BPF_JMP32)
12414                 return push_insn(t, t + 1, FALLTHROUGH, env, false);
12415
12416         switch (BPF_OP(insns[t].code)) {
12417         case BPF_EXIT:
12418                 return DONE_EXPLORING;
12419
12420         case BPF_CALL:
12421                 if (insns[t].imm == BPF_FUNC_timer_set_callback)
12422                         /* Mark this call insn as a prune point to trigger
12423                          * is_state_visited() check before call itself is
12424                          * processed by __check_func_call(). Otherwise new
12425                          * async state will be pushed for further exploration.
12426                          */
12427                         mark_prune_point(env, t);
12428                 return visit_func_call_insn(t, insns, env,
12429                                             insns[t].src_reg == BPF_PSEUDO_CALL);
12430
12431         case BPF_JA:
12432                 if (BPF_SRC(insns[t].code) != BPF_K)
12433                         return -EINVAL;
12434
12435                 /* unconditional jump with single edge */
12436                 ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
12437                                 true);
12438                 if (ret)
12439                         return ret;
12440
12441                 mark_prune_point(env, t + insns[t].off + 1);
12442                 mark_jmp_point(env, t + insns[t].off + 1);
12443
12444                 return ret;
12445
12446         default:
12447                 /* conditional jump with two edges */
12448                 mark_prune_point(env, t);
12449
12450                 ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
12451                 if (ret)
12452                         return ret;
12453
12454                 return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
12455         }
12456 }
12457
12458 /* non-recursive depth-first-search to detect loops in BPF program
12459  * loop == back-edge in directed graph
12460  */
12461 static int check_cfg(struct bpf_verifier_env *env)
12462 {
12463         int insn_cnt = env->prog->len;
12464         int *insn_stack, *insn_state;
12465         int ret = 0;
12466         int i;
12467
12468         insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
12469         if (!insn_state)
12470                 return -ENOMEM;
12471
12472         insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
12473         if (!insn_stack) {
12474                 kvfree(insn_state);
12475                 return -ENOMEM;
12476         }
12477
12478         insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
12479         insn_stack[0] = 0; /* 0 is the first instruction */
12480         env->cfg.cur_stack = 1;
12481
12482         while (env->cfg.cur_stack > 0) {
12483                 int t = insn_stack[env->cfg.cur_stack - 1];
12484
12485                 ret = visit_insn(t, env);
12486                 switch (ret) {
12487                 case DONE_EXPLORING:
12488                         insn_state[t] = EXPLORED;
12489                         env->cfg.cur_stack--;
12490                         break;
12491                 case KEEP_EXPLORING:
12492                         break;
12493                 default:
12494                         if (ret > 0) {
12495                                 verbose(env, "visit_insn internal bug\n");
12496                                 ret = -EFAULT;
12497                         }
12498                         goto err_free;
12499                 }
12500         }
12501
12502         if (env->cfg.cur_stack < 0) {
12503                 verbose(env, "pop stack internal bug\n");
12504                 ret = -EFAULT;
12505                 goto err_free;
12506         }
12507
12508         for (i = 0; i < insn_cnt; i++) {
12509                 if (insn_state[i] != EXPLORED) {
12510                         verbose(env, "unreachable insn %d\n", i);
12511                         ret = -EINVAL;
12512                         goto err_free;
12513                 }
12514         }
12515         ret = 0; /* cfg looks good */
12516
12517 err_free:
12518         kvfree(insn_state);
12519         kvfree(insn_stack);
12520         env->cfg.insn_state = env->cfg.insn_stack = NULL;
12521         return ret;
12522 }
12523
12524 static int check_abnormal_return(struct bpf_verifier_env *env)
12525 {
12526         int i;
12527
12528         for (i = 1; i < env->subprog_cnt; i++) {
12529                 if (env->subprog_info[i].has_ld_abs) {
12530                         verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
12531                         return -EINVAL;
12532                 }
12533                 if (env->subprog_info[i].has_tail_call) {
12534                         verbose(env, "tail_call is not allowed in subprogs without BTF\n");
12535                         return -EINVAL;
12536                 }
12537         }
12538         return 0;
12539 }
12540
12541 /* The minimum supported BTF func info size */
12542 #define MIN_BPF_FUNCINFO_SIZE   8
12543 #define MAX_FUNCINFO_REC_SIZE   252
12544
12545 static int check_btf_func(struct bpf_verifier_env *env,
12546                           const union bpf_attr *attr,
12547                           bpfptr_t uattr)
12548 {
12549         const struct btf_type *type, *func_proto, *ret_type;
12550         u32 i, nfuncs, urec_size, min_size;
12551         u32 krec_size = sizeof(struct bpf_func_info);
12552         struct bpf_func_info *krecord;
12553         struct bpf_func_info_aux *info_aux = NULL;
12554         struct bpf_prog *prog;
12555         const struct btf *btf;
12556         bpfptr_t urecord;
12557         u32 prev_offset = 0;
12558         bool scalar_return;
12559         int ret = -ENOMEM;
12560
12561         nfuncs = attr->func_info_cnt;
12562         if (!nfuncs) {
12563                 if (check_abnormal_return(env))
12564                         return -EINVAL;
12565                 return 0;
12566         }
12567
12568         if (nfuncs != env->subprog_cnt) {
12569                 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
12570                 return -EINVAL;
12571         }
12572
12573         urec_size = attr->func_info_rec_size;
12574         if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
12575             urec_size > MAX_FUNCINFO_REC_SIZE ||
12576             urec_size % sizeof(u32)) {
12577                 verbose(env, "invalid func info rec size %u\n", urec_size);
12578                 return -EINVAL;
12579         }
12580
12581         prog = env->prog;
12582         btf = prog->aux->btf;
12583
12584         urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
12585         min_size = min_t(u32, krec_size, urec_size);
12586
12587         krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
12588         if (!krecord)
12589                 return -ENOMEM;
12590         info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
12591         if (!info_aux)
12592                 goto err_free;
12593
12594         for (i = 0; i < nfuncs; i++) {
12595                 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
12596                 if (ret) {
12597                         if (ret == -E2BIG) {
12598                                 verbose(env, "nonzero tailing record in func info");
12599                                 /* set the size kernel expects so loader can zero
12600                                  * out the rest of the record.
12601                                  */
12602                                 if (copy_to_bpfptr_offset(uattr,
12603                                                           offsetof(union bpf_attr, func_info_rec_size),
12604                                                           &min_size, sizeof(min_size)))
12605                                         ret = -EFAULT;
12606                         }
12607                         goto err_free;
12608                 }
12609
12610                 if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
12611                         ret = -EFAULT;
12612                         goto err_free;
12613                 }
12614
12615                 /* check insn_off */
12616                 ret = -EINVAL;
12617                 if (i == 0) {
12618                         if (krecord[i].insn_off) {
12619                                 verbose(env,
12620                                         "nonzero insn_off %u for the first func info record",
12621                                         krecord[i].insn_off);
12622                                 goto err_free;
12623                         }
12624                 } else if (krecord[i].insn_off <= prev_offset) {
12625                         verbose(env,
12626                                 "same or smaller insn offset (%u) than previous func info record (%u)",
12627                                 krecord[i].insn_off, prev_offset);
12628                         goto err_free;
12629                 }
12630
12631                 if (env->subprog_info[i].start != krecord[i].insn_off) {
12632                         verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
12633                         goto err_free;
12634                 }
12635
12636                 /* check type_id */
12637                 type = btf_type_by_id(btf, krecord[i].type_id);
12638                 if (!type || !btf_type_is_func(type)) {
12639                         verbose(env, "invalid type id %d in func info",
12640                                 krecord[i].type_id);
12641                         goto err_free;
12642                 }
12643                 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
12644
12645                 func_proto = btf_type_by_id(btf, type->type);
12646                 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
12647                         /* btf_func_check() already verified it during BTF load */
12648                         goto err_free;
12649                 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
12650                 scalar_return =
12651                         btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
12652                 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
12653                         verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
12654                         goto err_free;
12655                 }
12656                 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
12657                         verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
12658                         goto err_free;
12659                 }
12660
12661                 prev_offset = krecord[i].insn_off;
12662                 bpfptr_add(&urecord, urec_size);
12663         }
12664
12665         prog->aux->func_info = krecord;
12666         prog->aux->func_info_cnt = nfuncs;
12667         prog->aux->func_info_aux = info_aux;
12668         return 0;
12669
12670 err_free:
12671         kvfree(krecord);
12672         kfree(info_aux);
12673         return ret;
12674 }
12675
12676 static void adjust_btf_func(struct bpf_verifier_env *env)
12677 {
12678         struct bpf_prog_aux *aux = env->prog->aux;
12679         int i;
12680
12681         if (!aux->func_info)
12682                 return;
12683
12684         for (i = 0; i < env->subprog_cnt; i++)
12685                 aux->func_info[i].insn_off = env->subprog_info[i].start;
12686 }
12687
12688 #define MIN_BPF_LINEINFO_SIZE   offsetofend(struct bpf_line_info, line_col)
12689 #define MAX_LINEINFO_REC_SIZE   MAX_FUNCINFO_REC_SIZE
12690
12691 static int check_btf_line(struct bpf_verifier_env *env,
12692                           const union bpf_attr *attr,
12693                           bpfptr_t uattr)
12694 {
12695         u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
12696         struct bpf_subprog_info *sub;
12697         struct bpf_line_info *linfo;
12698         struct bpf_prog *prog;
12699         const struct btf *btf;
12700         bpfptr_t ulinfo;
12701         int err;
12702
12703         nr_linfo = attr->line_info_cnt;
12704         if (!nr_linfo)
12705                 return 0;
12706         if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
12707                 return -EINVAL;
12708
12709         rec_size = attr->line_info_rec_size;
12710         if (rec_size < MIN_BPF_LINEINFO_SIZE ||
12711             rec_size > MAX_LINEINFO_REC_SIZE ||
12712             rec_size & (sizeof(u32) - 1))
12713                 return -EINVAL;
12714
12715         /* Need to zero it in case the userspace may
12716          * pass in a smaller bpf_line_info object.
12717          */
12718         linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
12719                          GFP_KERNEL | __GFP_NOWARN);
12720         if (!linfo)
12721                 return -ENOMEM;
12722
12723         prog = env->prog;
12724         btf = prog->aux->btf;
12725
12726         s = 0;
12727         sub = env->subprog_info;
12728         ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
12729         expected_size = sizeof(struct bpf_line_info);
12730         ncopy = min_t(u32, expected_size, rec_size);
12731         for (i = 0; i < nr_linfo; i++) {
12732                 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
12733                 if (err) {
12734                         if (err == -E2BIG) {
12735                                 verbose(env, "nonzero tailing record in line_info");
12736                                 if (copy_to_bpfptr_offset(uattr,
12737                                                           offsetof(union bpf_attr, line_info_rec_size),
12738                                                           &expected_size, sizeof(expected_size)))
12739                                         err = -EFAULT;
12740                         }
12741                         goto err_free;
12742                 }
12743
12744                 if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
12745                         err = -EFAULT;
12746                         goto err_free;
12747                 }
12748
12749                 /*
12750                  * Check insn_off to ensure
12751                  * 1) strictly increasing AND
12752                  * 2) bounded by prog->len
12753                  *
12754                  * The linfo[0].insn_off == 0 check logically falls into
12755                  * the later "missing bpf_line_info for func..." case
12756                  * because the first linfo[0].insn_off must be the
12757                  * first sub also and the first sub must have
12758                  * subprog_info[0].start == 0.
12759                  */
12760                 if ((i && linfo[i].insn_off <= prev_offset) ||
12761                     linfo[i].insn_off >= prog->len) {
12762                         verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
12763                                 i, linfo[i].insn_off, prev_offset,
12764                                 prog->len);
12765                         err = -EINVAL;
12766                         goto err_free;
12767                 }
12768
12769                 if (!prog->insnsi[linfo[i].insn_off].code) {
12770                         verbose(env,
12771                                 "Invalid insn code at line_info[%u].insn_off\n",
12772                                 i);
12773                         err = -EINVAL;
12774                         goto err_free;
12775                 }
12776
12777                 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
12778                     !btf_name_by_offset(btf, linfo[i].file_name_off)) {
12779                         verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
12780                         err = -EINVAL;
12781                         goto err_free;
12782                 }
12783
12784                 if (s != env->subprog_cnt) {
12785                         if (linfo[i].insn_off == sub[s].start) {
12786                                 sub[s].linfo_idx = i;
12787                                 s++;
12788                         } else if (sub[s].start < linfo[i].insn_off) {
12789                                 verbose(env, "missing bpf_line_info for func#%u\n", s);
12790                                 err = -EINVAL;
12791                                 goto err_free;
12792                         }
12793                 }
12794
12795                 prev_offset = linfo[i].insn_off;
12796                 bpfptr_add(&ulinfo, rec_size);
12797         }
12798
12799         if (s != env->subprog_cnt) {
12800                 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
12801                         env->subprog_cnt - s, s);
12802                 err = -EINVAL;
12803                 goto err_free;
12804         }
12805
12806         prog->aux->linfo = linfo;
12807         prog->aux->nr_linfo = nr_linfo;
12808
12809         return 0;
12810
12811 err_free:
12812         kvfree(linfo);
12813         return err;
12814 }
12815
12816 #define MIN_CORE_RELO_SIZE      sizeof(struct bpf_core_relo)
12817 #define MAX_CORE_RELO_SIZE      MAX_FUNCINFO_REC_SIZE
12818
12819 static int check_core_relo(struct bpf_verifier_env *env,
12820                            const union bpf_attr *attr,
12821                            bpfptr_t uattr)
12822 {
12823         u32 i, nr_core_relo, ncopy, expected_size, rec_size;
12824         struct bpf_core_relo core_relo = {};
12825         struct bpf_prog *prog = env->prog;
12826         const struct btf *btf = prog->aux->btf;
12827         struct bpf_core_ctx ctx = {
12828                 .log = &env->log,
12829                 .btf = btf,
12830         };
12831         bpfptr_t u_core_relo;
12832         int err;
12833
12834         nr_core_relo = attr->core_relo_cnt;
12835         if (!nr_core_relo)
12836                 return 0;
12837         if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
12838                 return -EINVAL;
12839
12840         rec_size = attr->core_relo_rec_size;
12841         if (rec_size < MIN_CORE_RELO_SIZE ||
12842             rec_size > MAX_CORE_RELO_SIZE ||
12843             rec_size % sizeof(u32))
12844                 return -EINVAL;
12845
12846         u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
12847         expected_size = sizeof(struct bpf_core_relo);
12848         ncopy = min_t(u32, expected_size, rec_size);
12849
12850         /* Unlike func_info and line_info, copy and apply each CO-RE
12851          * relocation record one at a time.
12852          */
12853         for (i = 0; i < nr_core_relo; i++) {
12854                 /* future proofing when sizeof(bpf_core_relo) changes */
12855                 err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
12856                 if (err) {
12857                         if (err == -E2BIG) {
12858                                 verbose(env, "nonzero tailing record in core_relo");
12859                                 if (copy_to_bpfptr_offset(uattr,
12860                                                           offsetof(union bpf_attr, core_relo_rec_size),
12861                                                           &expected_size, sizeof(expected_size)))
12862                                         err = -EFAULT;
12863                         }
12864                         break;
12865                 }
12866
12867                 if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
12868                         err = -EFAULT;
12869                         break;
12870                 }
12871
12872                 if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
12873                         verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
12874                                 i, core_relo.insn_off, prog->len);
12875                         err = -EINVAL;
12876                         break;
12877                 }
12878
12879                 err = bpf_core_apply(&ctx, &core_relo, i,
12880                                      &prog->insnsi[core_relo.insn_off / 8]);
12881                 if (err)
12882                         break;
12883                 bpfptr_add(&u_core_relo, rec_size);
12884         }
12885         return err;
12886 }
12887
12888 static int check_btf_info(struct bpf_verifier_env *env,
12889                           const union bpf_attr *attr,
12890                           bpfptr_t uattr)
12891 {
12892         struct btf *btf;
12893         int err;
12894
12895         if (!attr->func_info_cnt && !attr->line_info_cnt) {
12896                 if (check_abnormal_return(env))
12897                         return -EINVAL;
12898                 return 0;
12899         }
12900
12901         btf = btf_get_by_fd(attr->prog_btf_fd);
12902         if (IS_ERR(btf))
12903                 return PTR_ERR(btf);
12904         if (btf_is_kernel(btf)) {
12905                 btf_put(btf);
12906                 return -EACCES;
12907         }
12908         env->prog->aux->btf = btf;
12909
12910         err = check_btf_func(env, attr, uattr);
12911         if (err)
12912                 return err;
12913
12914         err = check_btf_line(env, attr, uattr);
12915         if (err)
12916                 return err;
12917
12918         err = check_core_relo(env, attr, uattr);
12919         if (err)
12920                 return err;
12921
12922         return 0;
12923 }
12924
12925 /* check %cur's range satisfies %old's */
12926 static bool range_within(struct bpf_reg_state *old,
12927                          struct bpf_reg_state *cur)
12928 {
12929         return old->umin_value <= cur->umin_value &&
12930                old->umax_value >= cur->umax_value &&
12931                old->smin_value <= cur->smin_value &&
12932                old->smax_value >= cur->smax_value &&
12933                old->u32_min_value <= cur->u32_min_value &&
12934                old->u32_max_value >= cur->u32_max_value &&
12935                old->s32_min_value <= cur->s32_min_value &&
12936                old->s32_max_value >= cur->s32_max_value;
12937 }
12938
12939 /* If in the old state two registers had the same id, then they need to have
12940  * the same id in the new state as well.  But that id could be different from
12941  * the old state, so we need to track the mapping from old to new ids.
12942  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
12943  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
12944  * regs with a different old id could still have new id 9, we don't care about
12945  * that.
12946  * So we look through our idmap to see if this old id has been seen before.  If
12947  * so, we require the new id to match; otherwise, we add the id pair to the map.
12948  */
12949 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
12950 {
12951         unsigned int i;
12952
12953         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
12954                 if (!idmap[i].old) {
12955                         /* Reached an empty slot; haven't seen this id before */
12956                         idmap[i].old = old_id;
12957                         idmap[i].cur = cur_id;
12958                         return true;
12959                 }
12960                 if (idmap[i].old == old_id)
12961                         return idmap[i].cur == cur_id;
12962         }
12963         /* We ran out of idmap slots, which should be impossible */
12964         WARN_ON_ONCE(1);
12965         return false;
12966 }
12967
12968 static void clean_func_state(struct bpf_verifier_env *env,
12969                              struct bpf_func_state *st)
12970 {
12971         enum bpf_reg_liveness live;
12972         int i, j;
12973
12974         for (i = 0; i < BPF_REG_FP; i++) {
12975                 live = st->regs[i].live;
12976                 /* liveness must not touch this register anymore */
12977                 st->regs[i].live |= REG_LIVE_DONE;
12978                 if (!(live & REG_LIVE_READ))
12979                         /* since the register is unused, clear its state
12980                          * to make further comparison simpler
12981                          */
12982                         __mark_reg_not_init(env, &st->regs[i]);
12983         }
12984
12985         for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
12986                 live = st->stack[i].spilled_ptr.live;
12987                 /* liveness must not touch this stack slot anymore */
12988                 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
12989                 if (!(live & REG_LIVE_READ)) {
12990                         __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
12991                         for (j = 0; j < BPF_REG_SIZE; j++)
12992                                 st->stack[i].slot_type[j] = STACK_INVALID;
12993                 }
12994         }
12995 }
12996
12997 static void clean_verifier_state(struct bpf_verifier_env *env,
12998                                  struct bpf_verifier_state *st)
12999 {
13000         int i;
13001
13002         if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
13003                 /* all regs in this state in all frames were already marked */
13004                 return;
13005
13006         for (i = 0; i <= st->curframe; i++)
13007                 clean_func_state(env, st->frame[i]);
13008 }
13009
13010 /* the parentage chains form a tree.
13011  * the verifier states are added to state lists at given insn and
13012  * pushed into state stack for future exploration.
13013  * when the verifier reaches bpf_exit insn some of the verifer states
13014  * stored in the state lists have their final liveness state already,
13015  * but a lot of states will get revised from liveness point of view when
13016  * the verifier explores other branches.
13017  * Example:
13018  * 1: r0 = 1
13019  * 2: if r1 == 100 goto pc+1
13020  * 3: r0 = 2
13021  * 4: exit
13022  * when the verifier reaches exit insn the register r0 in the state list of
13023  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
13024  * of insn 2 and goes exploring further. At the insn 4 it will walk the
13025  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
13026  *
13027  * Since the verifier pushes the branch states as it sees them while exploring
13028  * the program the condition of walking the branch instruction for the second
13029  * time means that all states below this branch were already explored and
13030  * their final liveness marks are already propagated.
13031  * Hence when the verifier completes the search of state list in is_state_visited()
13032  * we can call this clean_live_states() function to mark all liveness states
13033  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
13034  * will not be used.
13035  * This function also clears the registers and stack for states that !READ
13036  * to simplify state merging.
13037  *
13038  * Important note here that walking the same branch instruction in the callee
13039  * doesn't meant that the states are DONE. The verifier has to compare
13040  * the callsites
13041  */
13042 static void clean_live_states(struct bpf_verifier_env *env, int insn,
13043                               struct bpf_verifier_state *cur)
13044 {
13045         struct bpf_verifier_state_list *sl;
13046         int i;
13047
13048         sl = *explored_state(env, insn);
13049         while (sl) {
13050                 if (sl->state.branches)
13051                         goto next;
13052                 if (sl->state.insn_idx != insn ||
13053                     sl->state.curframe != cur->curframe)
13054                         goto next;
13055                 for (i = 0; i <= cur->curframe; i++)
13056                         if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
13057                                 goto next;
13058                 clean_verifier_state(env, &sl->state);
13059 next:
13060                 sl = sl->next;
13061         }
13062 }
13063
13064 /* Returns true if (rold safe implies rcur safe) */
13065 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
13066                     struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
13067 {
13068         bool equal;
13069
13070         if (!(rold->live & REG_LIVE_READ))
13071                 /* explored state didn't use this */
13072                 return true;
13073
13074         equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
13075
13076         if (rold->type == NOT_INIT)
13077                 /* explored state can't have used this */
13078                 return true;
13079         if (rcur->type == NOT_INIT)
13080                 return false;
13081         switch (base_type(rold->type)) {
13082         case SCALAR_VALUE:
13083                 if (equal)
13084                         return true;
13085                 if (env->explore_alu_limits)
13086                         return false;
13087                 if (rcur->type == SCALAR_VALUE) {
13088                         if (!rold->precise)
13089                                 return true;
13090                         /* new val must satisfy old val knowledge */
13091                         return range_within(rold, rcur) &&
13092                                tnum_in(rold->var_off, rcur->var_off);
13093                 } else {
13094                         /* We're trying to use a pointer in place of a scalar.
13095                          * Even if the scalar was unbounded, this could lead to
13096                          * pointer leaks because scalars are allowed to leak
13097                          * while pointers are not. We could make this safe in
13098                          * special cases if root is calling us, but it's
13099                          * probably not worth the hassle.
13100                          */
13101                         return false;
13102                 }
13103         case PTR_TO_MAP_KEY:
13104         case PTR_TO_MAP_VALUE:
13105                 /* a PTR_TO_MAP_VALUE could be safe to use as a
13106                  * PTR_TO_MAP_VALUE_OR_NULL into the same map.
13107                  * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
13108                  * checked, doing so could have affected others with the same
13109                  * id, and we can't check for that because we lost the id when
13110                  * we converted to a PTR_TO_MAP_VALUE.
13111                  */
13112                 if (type_may_be_null(rold->type)) {
13113                         if (!type_may_be_null(rcur->type))
13114                                 return false;
13115                         if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
13116                                 return false;
13117                         /* Check our ids match any regs they're supposed to */
13118                         return check_ids(rold->id, rcur->id, idmap);
13119                 }
13120
13121                 /* If the new min/max/var_off satisfy the old ones and
13122                  * everything else matches, we are OK.
13123                  * 'id' is not compared, since it's only used for maps with
13124                  * bpf_spin_lock inside map element and in such cases if
13125                  * the rest of the prog is valid for one map element then
13126                  * it's valid for all map elements regardless of the key
13127                  * used in bpf_map_lookup()
13128                  */
13129                 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
13130                        range_within(rold, rcur) &&
13131                        tnum_in(rold->var_off, rcur->var_off) &&
13132                        check_ids(rold->id, rcur->id, idmap);
13133         case PTR_TO_PACKET_META:
13134         case PTR_TO_PACKET:
13135                 if (rcur->type != rold->type)
13136                         return false;
13137                 /* We must have at least as much range as the old ptr
13138                  * did, so that any accesses which were safe before are
13139                  * still safe.  This is true even if old range < old off,
13140                  * since someone could have accessed through (ptr - k), or
13141                  * even done ptr -= k in a register, to get a safe access.
13142                  */
13143                 if (rold->range > rcur->range)
13144                         return false;
13145                 /* If the offsets don't match, we can't trust our alignment;
13146                  * nor can we be sure that we won't fall out of range.
13147                  */
13148                 if (rold->off != rcur->off)
13149                         return false;
13150                 /* id relations must be preserved */
13151                 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
13152                         return false;
13153                 /* new val must satisfy old val knowledge */
13154                 return range_within(rold, rcur) &&
13155                        tnum_in(rold->var_off, rcur->var_off);
13156         case PTR_TO_STACK:
13157                 /* two stack pointers are equal only if they're pointing to
13158                  * the same stack frame, since fp-8 in foo != fp-8 in bar
13159                  */
13160                 return equal && rold->frameno == rcur->frameno;
13161         default:
13162                 /* Only valid matches are exact, which memcmp() */
13163                 return equal;
13164         }
13165
13166         /* Shouldn't get here; if we do, say it's not safe */
13167         WARN_ON_ONCE(1);
13168         return false;
13169 }
13170
13171 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
13172                       struct bpf_func_state *cur, struct bpf_id_pair *idmap)
13173 {
13174         int i, spi;
13175
13176         /* walk slots of the explored stack and ignore any additional
13177          * slots in the current stack, since explored(safe) state
13178          * didn't use them
13179          */
13180         for (i = 0; i < old->allocated_stack; i++) {
13181                 spi = i / BPF_REG_SIZE;
13182
13183                 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
13184                         i += BPF_REG_SIZE - 1;
13185                         /* explored state didn't use this */
13186                         continue;
13187                 }
13188
13189                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
13190                         continue;
13191
13192                 /* explored stack has more populated slots than current stack
13193                  * and these slots were used
13194                  */
13195                 if (i >= cur->allocated_stack)
13196                         return false;
13197
13198                 /* if old state was safe with misc data in the stack
13199                  * it will be safe with zero-initialized stack.
13200                  * The opposite is not true
13201                  */
13202                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
13203                     cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
13204                         continue;
13205                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
13206                     cur->stack[spi].slot_type[i % BPF_REG_SIZE])
13207                         /* Ex: old explored (safe) state has STACK_SPILL in
13208                          * this stack slot, but current has STACK_MISC ->
13209                          * this verifier states are not equivalent,
13210                          * return false to continue verification of this path
13211                          */
13212                         return false;
13213                 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
13214                         continue;
13215                 if (!is_spilled_reg(&old->stack[spi]))
13216                         continue;
13217                 if (!regsafe(env, &old->stack[spi].spilled_ptr,
13218                              &cur->stack[spi].spilled_ptr, idmap))
13219                         /* when explored and current stack slot are both storing
13220                          * spilled registers, check that stored pointers types
13221                          * are the same as well.
13222                          * Ex: explored safe path could have stored
13223                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
13224                          * but current path has stored:
13225                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
13226                          * such verifier states are not equivalent.
13227                          * return false to continue verification of this path
13228                          */
13229                         return false;
13230         }
13231         return true;
13232 }
13233
13234 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
13235 {
13236         if (old->acquired_refs != cur->acquired_refs)
13237                 return false;
13238         return !memcmp(old->refs, cur->refs,
13239                        sizeof(*old->refs) * old->acquired_refs);
13240 }
13241
13242 /* compare two verifier states
13243  *
13244  * all states stored in state_list are known to be valid, since
13245  * verifier reached 'bpf_exit' instruction through them
13246  *
13247  * this function is called when verifier exploring different branches of
13248  * execution popped from the state stack. If it sees an old state that has
13249  * more strict register state and more strict stack state then this execution
13250  * branch doesn't need to be explored further, since verifier already
13251  * concluded that more strict state leads to valid finish.
13252  *
13253  * Therefore two states are equivalent if register state is more conservative
13254  * and explored stack state is more conservative than the current one.
13255  * Example:
13256  *       explored                   current
13257  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
13258  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
13259  *
13260  * In other words if current stack state (one being explored) has more
13261  * valid slots than old one that already passed validation, it means
13262  * the verifier can stop exploring and conclude that current state is valid too
13263  *
13264  * Similarly with registers. If explored state has register type as invalid
13265  * whereas register type in current state is meaningful, it means that
13266  * the current state will reach 'bpf_exit' instruction safely
13267  */
13268 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
13269                               struct bpf_func_state *cur)
13270 {
13271         int i;
13272
13273         for (i = 0; i < MAX_BPF_REG; i++)
13274                 if (!regsafe(env, &old->regs[i], &cur->regs[i],
13275                              env->idmap_scratch))
13276                         return false;
13277
13278         if (!stacksafe(env, old, cur, env->idmap_scratch))
13279                 return false;
13280
13281         if (!refsafe(old, cur))
13282                 return false;
13283
13284         return true;
13285 }
13286
13287 static bool states_equal(struct bpf_verifier_env *env,
13288                          struct bpf_verifier_state *old,
13289                          struct bpf_verifier_state *cur)
13290 {
13291         int i;
13292
13293         if (old->curframe != cur->curframe)
13294                 return false;
13295
13296         memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
13297
13298         /* Verification state from speculative execution simulation
13299          * must never prune a non-speculative execution one.
13300          */
13301         if (old->speculative && !cur->speculative)
13302                 return false;
13303
13304         if (old->active_lock.ptr != cur->active_lock.ptr)
13305                 return false;
13306
13307         /* Old and cur active_lock's have to be either both present
13308          * or both absent.
13309          */
13310         if (!!old->active_lock.id != !!cur->active_lock.id)
13311                 return false;
13312
13313         if (old->active_lock.id &&
13314             !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch))
13315                 return false;
13316
13317         if (old->active_rcu_lock != cur->active_rcu_lock)
13318                 return false;
13319
13320         /* for states to be equal callsites have to be the same
13321          * and all frame states need to be equivalent
13322          */
13323         for (i = 0; i <= old->curframe; i++) {
13324                 if (old->frame[i]->callsite != cur->frame[i]->callsite)
13325                         return false;
13326                 if (!func_states_equal(env, old->frame[i], cur->frame[i]))
13327                         return false;
13328         }
13329         return true;
13330 }
13331
13332 /* Return 0 if no propagation happened. Return negative error code if error
13333  * happened. Otherwise, return the propagated bit.
13334  */
13335 static int propagate_liveness_reg(struct bpf_verifier_env *env,
13336                                   struct bpf_reg_state *reg,
13337                                   struct bpf_reg_state *parent_reg)
13338 {
13339         u8 parent_flag = parent_reg->live & REG_LIVE_READ;
13340         u8 flag = reg->live & REG_LIVE_READ;
13341         int err;
13342
13343         /* When comes here, read flags of PARENT_REG or REG could be any of
13344          * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
13345          * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
13346          */
13347         if (parent_flag == REG_LIVE_READ64 ||
13348             /* Or if there is no read flag from REG. */
13349             !flag ||
13350             /* Or if the read flag from REG is the same as PARENT_REG. */
13351             parent_flag == flag)
13352                 return 0;
13353
13354         err = mark_reg_read(env, reg, parent_reg, flag);
13355         if (err)
13356                 return err;
13357
13358         return flag;
13359 }
13360
13361 /* A write screens off any subsequent reads; but write marks come from the
13362  * straight-line code between a state and its parent.  When we arrive at an
13363  * equivalent state (jump target or such) we didn't arrive by the straight-line
13364  * code, so read marks in the state must propagate to the parent regardless
13365  * of the state's write marks. That's what 'parent == state->parent' comparison
13366  * in mark_reg_read() is for.
13367  */
13368 static int propagate_liveness(struct bpf_verifier_env *env,
13369                               const struct bpf_verifier_state *vstate,
13370                               struct bpf_verifier_state *vparent)
13371 {
13372         struct bpf_reg_state *state_reg, *parent_reg;
13373         struct bpf_func_state *state, *parent;
13374         int i, frame, err = 0;
13375
13376         if (vparent->curframe != vstate->curframe) {
13377                 WARN(1, "propagate_live: parent frame %d current frame %d\n",
13378                      vparent->curframe, vstate->curframe);
13379                 return -EFAULT;
13380         }
13381         /* Propagate read liveness of registers... */
13382         BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
13383         for (frame = 0; frame <= vstate->curframe; frame++) {
13384                 parent = vparent->frame[frame];
13385                 state = vstate->frame[frame];
13386                 parent_reg = parent->regs;
13387                 state_reg = state->regs;
13388                 /* We don't need to worry about FP liveness, it's read-only */
13389                 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
13390                         err = propagate_liveness_reg(env, &state_reg[i],
13391                                                      &parent_reg[i]);
13392                         if (err < 0)
13393                                 return err;
13394                         if (err == REG_LIVE_READ64)
13395                                 mark_insn_zext(env, &parent_reg[i]);
13396                 }
13397
13398                 /* Propagate stack slots. */
13399                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
13400                             i < parent->allocated_stack / BPF_REG_SIZE; i++) {
13401                         parent_reg = &parent->stack[i].spilled_ptr;
13402                         state_reg = &state->stack[i].spilled_ptr;
13403                         err = propagate_liveness_reg(env, state_reg,
13404                                                      parent_reg);
13405                         if (err < 0)
13406                                 return err;
13407                 }
13408         }
13409         return 0;
13410 }
13411
13412 /* find precise scalars in the previous equivalent state and
13413  * propagate them into the current state
13414  */
13415 static int propagate_precision(struct bpf_verifier_env *env,
13416                                const struct bpf_verifier_state *old)
13417 {
13418         struct bpf_reg_state *state_reg;
13419         struct bpf_func_state *state;
13420         int i, err = 0, fr;
13421
13422         for (fr = old->curframe; fr >= 0; fr--) {
13423                 state = old->frame[fr];
13424                 state_reg = state->regs;
13425                 for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
13426                         if (state_reg->type != SCALAR_VALUE ||
13427                             !state_reg->precise)
13428                                 continue;
13429                         if (env->log.level & BPF_LOG_LEVEL2)
13430                                 verbose(env, "frame %d: propagating r%d\n", i, fr);
13431                         err = mark_chain_precision_frame(env, fr, i);
13432                         if (err < 0)
13433                                 return err;
13434                 }
13435
13436                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
13437                         if (!is_spilled_reg(&state->stack[i]))
13438                                 continue;
13439                         state_reg = &state->stack[i].spilled_ptr;
13440                         if (state_reg->type != SCALAR_VALUE ||
13441                             !state_reg->precise)
13442                                 continue;
13443                         if (env->log.level & BPF_LOG_LEVEL2)
13444                                 verbose(env, "frame %d: propagating fp%d\n",
13445                                         (-i - 1) * BPF_REG_SIZE, fr);
13446                         err = mark_chain_precision_stack_frame(env, fr, i);
13447                         if (err < 0)
13448                                 return err;
13449                 }
13450         }
13451         return 0;
13452 }
13453
13454 static bool states_maybe_looping(struct bpf_verifier_state *old,
13455                                  struct bpf_verifier_state *cur)
13456 {
13457         struct bpf_func_state *fold, *fcur;
13458         int i, fr = cur->curframe;
13459
13460         if (old->curframe != fr)
13461                 return false;
13462
13463         fold = old->frame[fr];
13464         fcur = cur->frame[fr];
13465         for (i = 0; i < MAX_BPF_REG; i++)
13466                 if (memcmp(&fold->regs[i], &fcur->regs[i],
13467                            offsetof(struct bpf_reg_state, parent)))
13468                         return false;
13469         return true;
13470 }
13471
13472
13473 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
13474 {
13475         struct bpf_verifier_state_list *new_sl;
13476         struct bpf_verifier_state_list *sl, **pprev;
13477         struct bpf_verifier_state *cur = env->cur_state, *new;
13478         int i, j, err, states_cnt = 0;
13479         bool add_new_state = env->test_state_freq ? true : false;
13480
13481         /* bpf progs typically have pruning point every 4 instructions
13482          * http://vger.kernel.org/bpfconf2019.html#session-1
13483          * Do not add new state for future pruning if the verifier hasn't seen
13484          * at least 2 jumps and at least 8 instructions.
13485          * This heuristics helps decrease 'total_states' and 'peak_states' metric.
13486          * In tests that amounts to up to 50% reduction into total verifier
13487          * memory consumption and 20% verifier time speedup.
13488          */
13489         if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
13490             env->insn_processed - env->prev_insn_processed >= 8)
13491                 add_new_state = true;
13492
13493         pprev = explored_state(env, insn_idx);
13494         sl = *pprev;
13495
13496         clean_live_states(env, insn_idx, cur);
13497
13498         while (sl) {
13499                 states_cnt++;
13500                 if (sl->state.insn_idx != insn_idx)
13501                         goto next;
13502
13503                 if (sl->state.branches) {
13504                         struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
13505
13506                         if (frame->in_async_callback_fn &&
13507                             frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
13508                                 /* Different async_entry_cnt means that the verifier is
13509                                  * processing another entry into async callback.
13510                                  * Seeing the same state is not an indication of infinite
13511                                  * loop or infinite recursion.
13512                                  * But finding the same state doesn't mean that it's safe
13513                                  * to stop processing the current state. The previous state
13514                                  * hasn't yet reached bpf_exit, since state.branches > 0.
13515                                  * Checking in_async_callback_fn alone is not enough either.
13516                                  * Since the verifier still needs to catch infinite loops
13517                                  * inside async callbacks.
13518                                  */
13519                         } else if (states_maybe_looping(&sl->state, cur) &&
13520                                    states_equal(env, &sl->state, cur)) {
13521                                 verbose_linfo(env, insn_idx, "; ");
13522                                 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
13523                                 return -EINVAL;
13524                         }
13525                         /* if the verifier is processing a loop, avoid adding new state
13526                          * too often, since different loop iterations have distinct
13527                          * states and may not help future pruning.
13528                          * This threshold shouldn't be too low to make sure that
13529                          * a loop with large bound will be rejected quickly.
13530                          * The most abusive loop will be:
13531                          * r1 += 1
13532                          * if r1 < 1000000 goto pc-2
13533                          * 1M insn_procssed limit / 100 == 10k peak states.
13534                          * This threshold shouldn't be too high either, since states
13535                          * at the end of the loop are likely to be useful in pruning.
13536                          */
13537                         if (env->jmps_processed - env->prev_jmps_processed < 20 &&
13538                             env->insn_processed - env->prev_insn_processed < 100)
13539                                 add_new_state = false;
13540                         goto miss;
13541                 }
13542                 if (states_equal(env, &sl->state, cur)) {
13543                         sl->hit_cnt++;
13544                         /* reached equivalent register/stack state,
13545                          * prune the search.
13546                          * Registers read by the continuation are read by us.
13547                          * If we have any write marks in env->cur_state, they
13548                          * will prevent corresponding reads in the continuation
13549                          * from reaching our parent (an explored_state).  Our
13550                          * own state will get the read marks recorded, but
13551                          * they'll be immediately forgotten as we're pruning
13552                          * this state and will pop a new one.
13553                          */
13554                         err = propagate_liveness(env, &sl->state, cur);
13555
13556                         /* if previous state reached the exit with precision and
13557                          * current state is equivalent to it (except precsion marks)
13558                          * the precision needs to be propagated back in
13559                          * the current state.
13560                          */
13561                         err = err ? : push_jmp_history(env, cur);
13562                         err = err ? : propagate_precision(env, &sl->state);
13563                         if (err)
13564                                 return err;
13565                         return 1;
13566                 }
13567 miss:
13568                 /* when new state is not going to be added do not increase miss count.
13569                  * Otherwise several loop iterations will remove the state
13570                  * recorded earlier. The goal of these heuristics is to have
13571                  * states from some iterations of the loop (some in the beginning
13572                  * and some at the end) to help pruning.
13573                  */
13574                 if (add_new_state)
13575                         sl->miss_cnt++;
13576                 /* heuristic to determine whether this state is beneficial
13577                  * to keep checking from state equivalence point of view.
13578                  * Higher numbers increase max_states_per_insn and verification time,
13579                  * but do not meaningfully decrease insn_processed.
13580                  */
13581                 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
13582                         /* the state is unlikely to be useful. Remove it to
13583                          * speed up verification
13584                          */
13585                         *pprev = sl->next;
13586                         if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
13587                                 u32 br = sl->state.branches;
13588
13589                                 WARN_ONCE(br,
13590                                           "BUG live_done but branches_to_explore %d\n",
13591                                           br);
13592                                 free_verifier_state(&sl->state, false);
13593                                 kfree(sl);
13594                                 env->peak_states--;
13595                         } else {
13596                                 /* cannot free this state, since parentage chain may
13597                                  * walk it later. Add it for free_list instead to
13598                                  * be freed at the end of verification
13599                                  */
13600                                 sl->next = env->free_list;
13601                                 env->free_list = sl;
13602                         }
13603                         sl = *pprev;
13604                         continue;
13605                 }
13606 next:
13607                 pprev = &sl->next;
13608                 sl = *pprev;
13609         }
13610
13611         if (env->max_states_per_insn < states_cnt)
13612                 env->max_states_per_insn = states_cnt;
13613
13614         if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
13615                 return 0;
13616
13617         if (!add_new_state)
13618                 return 0;
13619
13620         /* There were no equivalent states, remember the current one.
13621          * Technically the current state is not proven to be safe yet,
13622          * but it will either reach outer most bpf_exit (which means it's safe)
13623          * or it will be rejected. When there are no loops the verifier won't be
13624          * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
13625          * again on the way to bpf_exit.
13626          * When looping the sl->state.branches will be > 0 and this state
13627          * will not be considered for equivalence until branches == 0.
13628          */
13629         new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
13630         if (!new_sl)
13631                 return -ENOMEM;
13632         env->total_states++;
13633         env->peak_states++;
13634         env->prev_jmps_processed = env->jmps_processed;
13635         env->prev_insn_processed = env->insn_processed;
13636
13637         /* forget precise markings we inherited, see __mark_chain_precision */
13638         if (env->bpf_capable)
13639                 mark_all_scalars_imprecise(env, cur);
13640
13641         /* add new state to the head of linked list */
13642         new = &new_sl->state;
13643         err = copy_verifier_state(new, cur);
13644         if (err) {
13645                 free_verifier_state(new, false);
13646                 kfree(new_sl);
13647                 return err;
13648         }
13649         new->insn_idx = insn_idx;
13650         WARN_ONCE(new->branches != 1,
13651                   "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
13652
13653         cur->parent = new;
13654         cur->first_insn_idx = insn_idx;
13655         clear_jmp_history(cur);
13656         new_sl->next = *explored_state(env, insn_idx);
13657         *explored_state(env, insn_idx) = new_sl;
13658         /* connect new state to parentage chain. Current frame needs all
13659          * registers connected. Only r6 - r9 of the callers are alive (pushed
13660          * to the stack implicitly by JITs) so in callers' frames connect just
13661          * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
13662          * the state of the call instruction (with WRITTEN set), and r0 comes
13663          * from callee with its full parentage chain, anyway.
13664          */
13665         /* clear write marks in current state: the writes we did are not writes
13666          * our child did, so they don't screen off its reads from us.
13667          * (There are no read marks in current state, because reads always mark
13668          * their parent and current state never has children yet.  Only
13669          * explored_states can get read marks.)
13670          */
13671         for (j = 0; j <= cur->curframe; j++) {
13672                 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
13673                         cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
13674                 for (i = 0; i < BPF_REG_FP; i++)
13675                         cur->frame[j]->regs[i].live = REG_LIVE_NONE;
13676         }
13677
13678         /* all stack frames are accessible from callee, clear them all */
13679         for (j = 0; j <= cur->curframe; j++) {
13680                 struct bpf_func_state *frame = cur->frame[j];
13681                 struct bpf_func_state *newframe = new->frame[j];
13682
13683                 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
13684                         frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
13685                         frame->stack[i].spilled_ptr.parent =
13686                                                 &newframe->stack[i].spilled_ptr;
13687                 }
13688         }
13689         return 0;
13690 }
13691
13692 /* Return true if it's OK to have the same insn return a different type. */
13693 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
13694 {
13695         switch (base_type(type)) {
13696         case PTR_TO_CTX:
13697         case PTR_TO_SOCKET:
13698         case PTR_TO_SOCK_COMMON:
13699         case PTR_TO_TCP_SOCK:
13700         case PTR_TO_XDP_SOCK:
13701         case PTR_TO_BTF_ID:
13702                 return false;
13703         default:
13704                 return true;
13705         }
13706 }
13707
13708 /* If an instruction was previously used with particular pointer types, then we
13709  * need to be careful to avoid cases such as the below, where it may be ok
13710  * for one branch accessing the pointer, but not ok for the other branch:
13711  *
13712  * R1 = sock_ptr
13713  * goto X;
13714  * ...
13715  * R1 = some_other_valid_ptr;
13716  * goto X;
13717  * ...
13718  * R2 = *(u32 *)(R1 + 0);
13719  */
13720 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
13721 {
13722         return src != prev && (!reg_type_mismatch_ok(src) ||
13723                                !reg_type_mismatch_ok(prev));
13724 }
13725
13726 static int do_check(struct bpf_verifier_env *env)
13727 {
13728         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
13729         struct bpf_verifier_state *state = env->cur_state;
13730         struct bpf_insn *insns = env->prog->insnsi;
13731         struct bpf_reg_state *regs;
13732         int insn_cnt = env->prog->len;
13733         bool do_print_state = false;
13734         int prev_insn_idx = -1;
13735
13736         for (;;) {
13737                 struct bpf_insn *insn;
13738                 u8 class;
13739                 int err;
13740
13741                 env->prev_insn_idx = prev_insn_idx;
13742                 if (env->insn_idx >= insn_cnt) {
13743                         verbose(env, "invalid insn idx %d insn_cnt %d\n",
13744                                 env->insn_idx, insn_cnt);
13745                         return -EFAULT;
13746                 }
13747
13748                 insn = &insns[env->insn_idx];
13749                 class = BPF_CLASS(insn->code);
13750
13751                 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
13752                         verbose(env,
13753                                 "BPF program is too large. Processed %d insn\n",
13754                                 env->insn_processed);
13755                         return -E2BIG;
13756                 }
13757
13758                 state->last_insn_idx = env->prev_insn_idx;
13759
13760                 if (is_prune_point(env, env->insn_idx)) {
13761                         err = is_state_visited(env, env->insn_idx);
13762                         if (err < 0)
13763                                 return err;
13764                         if (err == 1) {
13765                                 /* found equivalent state, can prune the search */
13766                                 if (env->log.level & BPF_LOG_LEVEL) {
13767                                         if (do_print_state)
13768                                                 verbose(env, "\nfrom %d to %d%s: safe\n",
13769                                                         env->prev_insn_idx, env->insn_idx,
13770                                                         env->cur_state->speculative ?
13771                                                         " (speculative execution)" : "");
13772                                         else
13773                                                 verbose(env, "%d: safe\n", env->insn_idx);
13774                                 }
13775                                 goto process_bpf_exit;
13776                         }
13777                 }
13778
13779                 if (is_jmp_point(env, env->insn_idx)) {
13780                         err = push_jmp_history(env, state);
13781                         if (err)
13782                                 return err;
13783                 }
13784
13785                 if (signal_pending(current))
13786                         return -EAGAIN;
13787
13788                 if (need_resched())
13789                         cond_resched();
13790
13791                 if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
13792                         verbose(env, "\nfrom %d to %d%s:",
13793                                 env->prev_insn_idx, env->insn_idx,
13794                                 env->cur_state->speculative ?
13795                                 " (speculative execution)" : "");
13796                         print_verifier_state(env, state->frame[state->curframe], true);
13797                         do_print_state = false;
13798                 }
13799
13800                 if (env->log.level & BPF_LOG_LEVEL) {
13801                         const struct bpf_insn_cbs cbs = {
13802                                 .cb_call        = disasm_kfunc_name,
13803                                 .cb_print       = verbose,
13804                                 .private_data   = env,
13805                         };
13806
13807                         if (verifier_state_scratched(env))
13808                                 print_insn_state(env, state->frame[state->curframe]);
13809
13810                         verbose_linfo(env, env->insn_idx, "; ");
13811                         env->prev_log_len = env->log.len_used;
13812                         verbose(env, "%d: ", env->insn_idx);
13813                         print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
13814                         env->prev_insn_print_len = env->log.len_used - env->prev_log_len;
13815                         env->prev_log_len = env->log.len_used;
13816                 }
13817
13818                 if (bpf_prog_is_dev_bound(env->prog->aux)) {
13819                         err = bpf_prog_offload_verify_insn(env, env->insn_idx,
13820                                                            env->prev_insn_idx);
13821                         if (err)
13822                                 return err;
13823                 }
13824
13825                 regs = cur_regs(env);
13826                 sanitize_mark_insn_seen(env);
13827                 prev_insn_idx = env->insn_idx;
13828
13829                 if (class == BPF_ALU || class == BPF_ALU64) {
13830                         err = check_alu_op(env, insn);
13831                         if (err)
13832                                 return err;
13833
13834                 } else if (class == BPF_LDX) {
13835                         enum bpf_reg_type *prev_src_type, src_reg_type;
13836
13837                         /* check for reserved fields is already done */
13838
13839                         /* check src operand */
13840                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
13841                         if (err)
13842                                 return err;
13843
13844                         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
13845                         if (err)
13846                                 return err;
13847
13848                         src_reg_type = regs[insn->src_reg].type;
13849
13850                         /* check that memory (src_reg + off) is readable,
13851                          * the state of dst_reg will be updated by this func
13852                          */
13853                         err = check_mem_access(env, env->insn_idx, insn->src_reg,
13854                                                insn->off, BPF_SIZE(insn->code),
13855                                                BPF_READ, insn->dst_reg, false);
13856                         if (err)
13857                                 return err;
13858
13859                         prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
13860
13861                         if (*prev_src_type == NOT_INIT) {
13862                                 /* saw a valid insn
13863                                  * dst_reg = *(u32 *)(src_reg + off)
13864                                  * save type to validate intersecting paths
13865                                  */
13866                                 *prev_src_type = src_reg_type;
13867
13868                         } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
13869                                 /* ABuser program is trying to use the same insn
13870                                  * dst_reg = *(u32*) (src_reg + off)
13871                                  * with different pointer types:
13872                                  * src_reg == ctx in one branch and
13873                                  * src_reg == stack|map in some other branch.
13874                                  * Reject it.
13875                                  */
13876                                 verbose(env, "same insn cannot be used with different pointers\n");
13877                                 return -EINVAL;
13878                         }
13879
13880                 } else if (class == BPF_STX) {
13881                         enum bpf_reg_type *prev_dst_type, dst_reg_type;
13882
13883                         if (BPF_MODE(insn->code) == BPF_ATOMIC) {
13884                                 err = check_atomic(env, env->insn_idx, insn);
13885                                 if (err)
13886                                         return err;
13887                                 env->insn_idx++;
13888                                 continue;
13889                         }
13890
13891                         if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
13892                                 verbose(env, "BPF_STX uses reserved fields\n");
13893                                 return -EINVAL;
13894                         }
13895
13896                         /* check src1 operand */
13897                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
13898                         if (err)
13899                                 return err;
13900                         /* check src2 operand */
13901                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
13902                         if (err)
13903                                 return err;
13904
13905                         dst_reg_type = regs[insn->dst_reg].type;
13906
13907                         /* check that memory (dst_reg + off) is writeable */
13908                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
13909                                                insn->off, BPF_SIZE(insn->code),
13910                                                BPF_WRITE, insn->src_reg, false);
13911                         if (err)
13912                                 return err;
13913
13914                         prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
13915
13916                         if (*prev_dst_type == NOT_INIT) {
13917                                 *prev_dst_type = dst_reg_type;
13918                         } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
13919                                 verbose(env, "same insn cannot be used with different pointers\n");
13920                                 return -EINVAL;
13921                         }
13922
13923                 } else if (class == BPF_ST) {
13924                         if (BPF_MODE(insn->code) != BPF_MEM ||
13925                             insn->src_reg != BPF_REG_0) {
13926                                 verbose(env, "BPF_ST uses reserved fields\n");
13927                                 return -EINVAL;
13928                         }
13929                         /* check src operand */
13930                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
13931                         if (err)
13932                                 return err;
13933
13934                         if (is_ctx_reg(env, insn->dst_reg)) {
13935                                 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
13936                                         insn->dst_reg,
13937                                         reg_type_str(env, reg_state(env, insn->dst_reg)->type));
13938                                 return -EACCES;
13939                         }
13940
13941                         /* check that memory (dst_reg + off) is writeable */
13942                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
13943                                                insn->off, BPF_SIZE(insn->code),
13944                                                BPF_WRITE, -1, false);
13945                         if (err)
13946                                 return err;
13947
13948                 } else if (class == BPF_JMP || class == BPF_JMP32) {
13949                         u8 opcode = BPF_OP(insn->code);
13950
13951                         env->jmps_processed++;
13952                         if (opcode == BPF_CALL) {
13953                                 if (BPF_SRC(insn->code) != BPF_K ||
13954                                     (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
13955                                      && insn->off != 0) ||
13956                                     (insn->src_reg != BPF_REG_0 &&
13957                                      insn->src_reg != BPF_PSEUDO_CALL &&
13958                                      insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
13959                                     insn->dst_reg != BPF_REG_0 ||
13960                                     class == BPF_JMP32) {
13961                                         verbose(env, "BPF_CALL uses reserved fields\n");
13962                                         return -EINVAL;
13963                                 }
13964
13965                                 if (env->cur_state->active_lock.ptr) {
13966                                         if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
13967                                             (insn->src_reg == BPF_PSEUDO_CALL) ||
13968                                             (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
13969                                              (insn->off != 0 || !is_bpf_list_api_kfunc(insn->imm)))) {
13970                                                 verbose(env, "function calls are not allowed while holding a lock\n");
13971                                                 return -EINVAL;
13972                                         }
13973                                 }
13974                                 if (insn->src_reg == BPF_PSEUDO_CALL)
13975                                         err = check_func_call(env, insn, &env->insn_idx);
13976                                 else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
13977                                         err = check_kfunc_call(env, insn, &env->insn_idx);
13978                                 else
13979                                         err = check_helper_call(env, insn, &env->insn_idx);
13980                                 if (err)
13981                                         return err;
13982                         } else if (opcode == BPF_JA) {
13983                                 if (BPF_SRC(insn->code) != BPF_K ||
13984                                     insn->imm != 0 ||
13985                                     insn->src_reg != BPF_REG_0 ||
13986                                     insn->dst_reg != BPF_REG_0 ||
13987                                     class == BPF_JMP32) {
13988                                         verbose(env, "BPF_JA uses reserved fields\n");
13989                                         return -EINVAL;
13990                                 }
13991
13992                                 env->insn_idx += insn->off + 1;
13993                                 continue;
13994
13995                         } else if (opcode == BPF_EXIT) {
13996                                 if (BPF_SRC(insn->code) != BPF_K ||
13997                                     insn->imm != 0 ||
13998                                     insn->src_reg != BPF_REG_0 ||
13999                                     insn->dst_reg != BPF_REG_0 ||
14000                                     class == BPF_JMP32) {
14001                                         verbose(env, "BPF_EXIT uses reserved fields\n");
14002                                         return -EINVAL;
14003                                 }
14004
14005                                 if (env->cur_state->active_lock.ptr) {
14006                                         verbose(env, "bpf_spin_unlock is missing\n");
14007                                         return -EINVAL;
14008                                 }
14009
14010                                 if (env->cur_state->active_rcu_lock) {
14011                                         verbose(env, "bpf_rcu_read_unlock is missing\n");
14012                                         return -EINVAL;
14013                                 }
14014
14015                                 /* We must do check_reference_leak here before
14016                                  * prepare_func_exit to handle the case when
14017                                  * state->curframe > 0, it may be a callback
14018                                  * function, for which reference_state must
14019                                  * match caller reference state when it exits.
14020                                  */
14021                                 err = check_reference_leak(env);
14022                                 if (err)
14023                                         return err;
14024
14025                                 if (state->curframe) {
14026                                         /* exit from nested function */
14027                                         err = prepare_func_exit(env, &env->insn_idx);
14028                                         if (err)
14029                                                 return err;
14030                                         do_print_state = true;
14031                                         continue;
14032                                 }
14033
14034                                 err = check_return_code(env);
14035                                 if (err)
14036                                         return err;
14037 process_bpf_exit:
14038                                 mark_verifier_state_scratched(env);
14039                                 update_branch_counts(env, env->cur_state);
14040                                 err = pop_stack(env, &prev_insn_idx,
14041                                                 &env->insn_idx, pop_log);
14042                                 if (err < 0) {
14043                                         if (err != -ENOENT)
14044                                                 return err;
14045                                         break;
14046                                 } else {
14047                                         do_print_state = true;
14048                                         continue;
14049                                 }
14050                         } else {
14051                                 err = check_cond_jmp_op(env, insn, &env->insn_idx);
14052                                 if (err)
14053                                         return err;
14054                         }
14055                 } else if (class == BPF_LD) {
14056                         u8 mode = BPF_MODE(insn->code);
14057
14058                         if (mode == BPF_ABS || mode == BPF_IND) {
14059                                 err = check_ld_abs(env, insn);
14060                                 if (err)
14061                                         return err;
14062
14063                         } else if (mode == BPF_IMM) {
14064                                 err = check_ld_imm(env, insn);
14065                                 if (err)
14066                                         return err;
14067
14068                                 env->insn_idx++;
14069                                 sanitize_mark_insn_seen(env);
14070                         } else {
14071                                 verbose(env, "invalid BPF_LD mode\n");
14072                                 return -EINVAL;
14073                         }
14074                 } else {
14075                         verbose(env, "unknown insn class %d\n", class);
14076                         return -EINVAL;
14077                 }
14078
14079                 env->insn_idx++;
14080         }
14081
14082         return 0;
14083 }
14084
14085 static int find_btf_percpu_datasec(struct btf *btf)
14086 {
14087         const struct btf_type *t;
14088         const char *tname;
14089         int i, n;
14090
14091         /*
14092          * Both vmlinux and module each have their own ".data..percpu"
14093          * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
14094          * types to look at only module's own BTF types.
14095          */
14096         n = btf_nr_types(btf);
14097         if (btf_is_module(btf))
14098                 i = btf_nr_types(btf_vmlinux);
14099         else
14100                 i = 1;
14101
14102         for(; i < n; i++) {
14103                 t = btf_type_by_id(btf, i);
14104                 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
14105                         continue;
14106
14107                 tname = btf_name_by_offset(btf, t->name_off);
14108                 if (!strcmp(tname, ".data..percpu"))
14109                         return i;
14110         }
14111
14112         return -ENOENT;
14113 }
14114
14115 /* replace pseudo btf_id with kernel symbol address */
14116 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
14117                                struct bpf_insn *insn,
14118                                struct bpf_insn_aux_data *aux)
14119 {
14120         const struct btf_var_secinfo *vsi;
14121         const struct btf_type *datasec;
14122         struct btf_mod_pair *btf_mod;
14123         const struct btf_type *t;
14124         const char *sym_name;
14125         bool percpu = false;
14126         u32 type, id = insn->imm;
14127         struct btf *btf;
14128         s32 datasec_id;
14129         u64 addr;
14130         int i, btf_fd, err;
14131
14132         btf_fd = insn[1].imm;
14133         if (btf_fd) {
14134                 btf = btf_get_by_fd(btf_fd);
14135                 if (IS_ERR(btf)) {
14136                         verbose(env, "invalid module BTF object FD specified.\n");
14137                         return -EINVAL;
14138                 }
14139         } else {
14140                 if (!btf_vmlinux) {
14141                         verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
14142                         return -EINVAL;
14143                 }
14144                 btf = btf_vmlinux;
14145                 btf_get(btf);
14146         }
14147
14148         t = btf_type_by_id(btf, id);
14149         if (!t) {
14150                 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
14151                 err = -ENOENT;
14152                 goto err_put;
14153         }
14154
14155         if (!btf_type_is_var(t)) {
14156                 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
14157                 err = -EINVAL;
14158                 goto err_put;
14159         }
14160
14161         sym_name = btf_name_by_offset(btf, t->name_off);
14162         addr = kallsyms_lookup_name(sym_name);
14163         if (!addr) {
14164                 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
14165                         sym_name);
14166                 err = -ENOENT;
14167                 goto err_put;
14168         }
14169
14170         datasec_id = find_btf_percpu_datasec(btf);
14171         if (datasec_id > 0) {
14172                 datasec = btf_type_by_id(btf, datasec_id);
14173                 for_each_vsi(i, datasec, vsi) {
14174                         if (vsi->type == id) {
14175                                 percpu = true;
14176                                 break;
14177                         }
14178                 }
14179         }
14180
14181         insn[0].imm = (u32)addr;
14182         insn[1].imm = addr >> 32;
14183
14184         type = t->type;
14185         t = btf_type_skip_modifiers(btf, type, NULL);
14186         if (percpu) {
14187                 aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
14188                 aux->btf_var.btf = btf;
14189                 aux->btf_var.btf_id = type;
14190         } else if (!btf_type_is_struct(t)) {
14191                 const struct btf_type *ret;
14192                 const char *tname;
14193                 u32 tsize;
14194
14195                 /* resolve the type size of ksym. */
14196                 ret = btf_resolve_size(btf, t, &tsize);
14197                 if (IS_ERR(ret)) {
14198                         tname = btf_name_by_offset(btf, t->name_off);
14199                         verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
14200                                 tname, PTR_ERR(ret));
14201                         err = -EINVAL;
14202                         goto err_put;
14203                 }
14204                 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
14205                 aux->btf_var.mem_size = tsize;
14206         } else {
14207                 aux->btf_var.reg_type = PTR_TO_BTF_ID;
14208                 aux->btf_var.btf = btf;
14209                 aux->btf_var.btf_id = type;
14210         }
14211
14212         /* check whether we recorded this BTF (and maybe module) already */
14213         for (i = 0; i < env->used_btf_cnt; i++) {
14214                 if (env->used_btfs[i].btf == btf) {
14215                         btf_put(btf);
14216                         return 0;
14217                 }
14218         }
14219
14220         if (env->used_btf_cnt >= MAX_USED_BTFS) {
14221                 err = -E2BIG;
14222                 goto err_put;
14223         }
14224
14225         btf_mod = &env->used_btfs[env->used_btf_cnt];
14226         btf_mod->btf = btf;
14227         btf_mod->module = NULL;
14228
14229         /* if we reference variables from kernel module, bump its refcount */
14230         if (btf_is_module(btf)) {
14231                 btf_mod->module = btf_try_get_module(btf);
14232                 if (!btf_mod->module) {
14233                         err = -ENXIO;
14234                         goto err_put;
14235                 }
14236         }
14237
14238         env->used_btf_cnt++;
14239
14240         return 0;
14241 err_put:
14242         btf_put(btf);
14243         return err;
14244 }
14245
14246 static bool is_tracing_prog_type(enum bpf_prog_type type)
14247 {
14248         switch (type) {
14249         case BPF_PROG_TYPE_KPROBE:
14250         case BPF_PROG_TYPE_TRACEPOINT:
14251         case BPF_PROG_TYPE_PERF_EVENT:
14252         case BPF_PROG_TYPE_RAW_TRACEPOINT:
14253         case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
14254                 return true;
14255         default:
14256                 return false;
14257         }
14258 }
14259
14260 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
14261                                         struct bpf_map *map,
14262                                         struct bpf_prog *prog)
14263
14264 {
14265         enum bpf_prog_type prog_type = resolve_prog_type(prog);
14266
14267         if (btf_record_has_field(map->record, BPF_LIST_HEAD)) {
14268                 if (is_tracing_prog_type(prog_type)) {
14269                         verbose(env, "tracing progs cannot use bpf_list_head yet\n");
14270                         return -EINVAL;
14271                 }
14272         }
14273
14274         if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
14275                 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
14276                         verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
14277                         return -EINVAL;
14278                 }
14279
14280                 if (is_tracing_prog_type(prog_type)) {
14281                         verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
14282                         return -EINVAL;
14283                 }
14284
14285                 if (prog->aux->sleepable) {
14286                         verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
14287                         return -EINVAL;
14288                 }
14289         }
14290
14291         if (btf_record_has_field(map->record, BPF_TIMER)) {
14292                 if (is_tracing_prog_type(prog_type)) {
14293                         verbose(env, "tracing progs cannot use bpf_timer yet\n");
14294                         return -EINVAL;
14295                 }
14296         }
14297
14298         if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
14299             !bpf_offload_prog_map_match(prog, map)) {
14300                 verbose(env, "offload device mismatch between prog and map\n");
14301                 return -EINVAL;
14302         }
14303
14304         if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
14305                 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
14306                 return -EINVAL;
14307         }
14308
14309         if (prog->aux->sleepable)
14310                 switch (map->map_type) {
14311                 case BPF_MAP_TYPE_HASH:
14312                 case BPF_MAP_TYPE_LRU_HASH:
14313                 case BPF_MAP_TYPE_ARRAY:
14314                 case BPF_MAP_TYPE_PERCPU_HASH:
14315                 case BPF_MAP_TYPE_PERCPU_ARRAY:
14316                 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
14317                 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
14318                 case BPF_MAP_TYPE_HASH_OF_MAPS:
14319                 case BPF_MAP_TYPE_RINGBUF:
14320                 case BPF_MAP_TYPE_USER_RINGBUF:
14321                 case BPF_MAP_TYPE_INODE_STORAGE:
14322                 case BPF_MAP_TYPE_SK_STORAGE:
14323                 case BPF_MAP_TYPE_TASK_STORAGE:
14324                 case BPF_MAP_TYPE_CGRP_STORAGE:
14325                         break;
14326                 default:
14327                         verbose(env,
14328                                 "Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
14329                         return -EINVAL;
14330                 }
14331
14332         return 0;
14333 }
14334
14335 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
14336 {
14337         return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
14338                 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
14339 }
14340
14341 /* find and rewrite pseudo imm in ld_imm64 instructions:
14342  *
14343  * 1. if it accesses map FD, replace it with actual map pointer.
14344  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
14345  *
14346  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
14347  */
14348 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
14349 {
14350         struct bpf_insn *insn = env->prog->insnsi;
14351         int insn_cnt = env->prog->len;
14352         int i, j, err;
14353
14354         err = bpf_prog_calc_tag(env->prog);
14355         if (err)
14356                 return err;
14357
14358         for (i = 0; i < insn_cnt; i++, insn++) {
14359                 if (BPF_CLASS(insn->code) == BPF_LDX &&
14360                     (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
14361                         verbose(env, "BPF_LDX uses reserved fields\n");
14362                         return -EINVAL;
14363                 }
14364
14365                 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
14366                         struct bpf_insn_aux_data *aux;
14367                         struct bpf_map *map;
14368                         struct fd f;
14369                         u64 addr;
14370                         u32 fd;
14371
14372                         if (i == insn_cnt - 1 || insn[1].code != 0 ||
14373                             insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
14374                             insn[1].off != 0) {
14375                                 verbose(env, "invalid bpf_ld_imm64 insn\n");
14376                                 return -EINVAL;
14377                         }
14378
14379                         if (insn[0].src_reg == 0)
14380                                 /* valid generic load 64-bit imm */
14381                                 goto next_insn;
14382
14383                         if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
14384                                 aux = &env->insn_aux_data[i];
14385                                 err = check_pseudo_btf_id(env, insn, aux);
14386                                 if (err)
14387                                         return err;
14388                                 goto next_insn;
14389                         }
14390
14391                         if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
14392                                 aux = &env->insn_aux_data[i];
14393                                 aux->ptr_type = PTR_TO_FUNC;
14394                                 goto next_insn;
14395                         }
14396
14397                         /* In final convert_pseudo_ld_imm64() step, this is
14398                          * converted into regular 64-bit imm load insn.
14399                          */
14400                         switch (insn[0].src_reg) {
14401                         case BPF_PSEUDO_MAP_VALUE:
14402                         case BPF_PSEUDO_MAP_IDX_VALUE:
14403                                 break;
14404                         case BPF_PSEUDO_MAP_FD:
14405                         case BPF_PSEUDO_MAP_IDX:
14406                                 if (insn[1].imm == 0)
14407                                         break;
14408                                 fallthrough;
14409                         default:
14410                                 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
14411                                 return -EINVAL;
14412                         }
14413
14414                         switch (insn[0].src_reg) {
14415                         case BPF_PSEUDO_MAP_IDX_VALUE:
14416                         case BPF_PSEUDO_MAP_IDX:
14417                                 if (bpfptr_is_null(env->fd_array)) {
14418                                         verbose(env, "fd_idx without fd_array is invalid\n");
14419                                         return -EPROTO;
14420                                 }
14421                                 if (copy_from_bpfptr_offset(&fd, env->fd_array,
14422                                                             insn[0].imm * sizeof(fd),
14423                                                             sizeof(fd)))
14424                                         return -EFAULT;
14425                                 break;
14426                         default:
14427                                 fd = insn[0].imm;
14428                                 break;
14429                         }
14430
14431                         f = fdget(fd);
14432                         map = __bpf_map_get(f);
14433                         if (IS_ERR(map)) {
14434                                 verbose(env, "fd %d is not pointing to valid bpf_map\n",
14435                                         insn[0].imm);
14436                                 return PTR_ERR(map);
14437                         }
14438
14439                         err = check_map_prog_compatibility(env, map, env->prog);
14440                         if (err) {
14441                                 fdput(f);
14442                                 return err;
14443                         }
14444
14445                         aux = &env->insn_aux_data[i];
14446                         if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
14447                             insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
14448                                 addr = (unsigned long)map;
14449                         } else {
14450                                 u32 off = insn[1].imm;
14451
14452                                 if (off >= BPF_MAX_VAR_OFF) {
14453                                         verbose(env, "direct value offset of %u is not allowed\n", off);
14454                                         fdput(f);
14455                                         return -EINVAL;
14456                                 }
14457
14458                                 if (!map->ops->map_direct_value_addr) {
14459                                         verbose(env, "no direct value access support for this map type\n");
14460                                         fdput(f);
14461                                         return -EINVAL;
14462                                 }
14463
14464                                 err = map->ops->map_direct_value_addr(map, &addr, off);
14465                                 if (err) {
14466                                         verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
14467                                                 map->value_size, off);
14468                                         fdput(f);
14469                                         return err;
14470                                 }
14471
14472                                 aux->map_off = off;
14473                                 addr += off;
14474                         }
14475
14476                         insn[0].imm = (u32)addr;
14477                         insn[1].imm = addr >> 32;
14478
14479                         /* check whether we recorded this map already */
14480                         for (j = 0; j < env->used_map_cnt; j++) {
14481                                 if (env->used_maps[j] == map) {
14482                                         aux->map_index = j;
14483                                         fdput(f);
14484                                         goto next_insn;
14485                                 }
14486                         }
14487
14488                         if (env->used_map_cnt >= MAX_USED_MAPS) {
14489                                 fdput(f);
14490                                 return -E2BIG;
14491                         }
14492
14493                         /* hold the map. If the program is rejected by verifier,
14494                          * the map will be released by release_maps() or it
14495                          * will be used by the valid program until it's unloaded
14496                          * and all maps are released in free_used_maps()
14497                          */
14498                         bpf_map_inc(map);
14499
14500                         aux->map_index = env->used_map_cnt;
14501                         env->used_maps[env->used_map_cnt++] = map;
14502
14503                         if (bpf_map_is_cgroup_storage(map) &&
14504                             bpf_cgroup_storage_assign(env->prog->aux, map)) {
14505                                 verbose(env, "only one cgroup storage of each type is allowed\n");
14506                                 fdput(f);
14507                                 return -EBUSY;
14508                         }
14509
14510                         fdput(f);
14511 next_insn:
14512                         insn++;
14513                         i++;
14514                         continue;
14515                 }
14516
14517                 /* Basic sanity check before we invest more work here. */
14518                 if (!bpf_opcode_in_insntable(insn->code)) {
14519                         verbose(env, "unknown opcode %02x\n", insn->code);
14520                         return -EINVAL;
14521                 }
14522         }
14523
14524         /* now all pseudo BPF_LD_IMM64 instructions load valid
14525          * 'struct bpf_map *' into a register instead of user map_fd.
14526          * These pointers will be used later by verifier to validate map access.
14527          */
14528         return 0;
14529 }
14530
14531 /* drop refcnt of maps used by the rejected program */
14532 static void release_maps(struct bpf_verifier_env *env)
14533 {
14534         __bpf_free_used_maps(env->prog->aux, env->used_maps,
14535                              env->used_map_cnt);
14536 }
14537
14538 /* drop refcnt of maps used by the rejected program */
14539 static void release_btfs(struct bpf_verifier_env *env)
14540 {
14541         __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
14542                              env->used_btf_cnt);
14543 }
14544
14545 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
14546 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
14547 {
14548         struct bpf_insn *insn = env->prog->insnsi;
14549         int insn_cnt = env->prog->len;
14550         int i;
14551
14552         for (i = 0; i < insn_cnt; i++, insn++) {
14553                 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
14554                         continue;
14555                 if (insn->src_reg == BPF_PSEUDO_FUNC)
14556                         continue;
14557                 insn->src_reg = 0;
14558         }
14559 }
14560
14561 /* single env->prog->insni[off] instruction was replaced with the range
14562  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
14563  * [0, off) and [off, end) to new locations, so the patched range stays zero
14564  */
14565 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
14566                                  struct bpf_insn_aux_data *new_data,
14567                                  struct bpf_prog *new_prog, u32 off, u32 cnt)
14568 {
14569         struct bpf_insn_aux_data *old_data = env->insn_aux_data;
14570         struct bpf_insn *insn = new_prog->insnsi;
14571         u32 old_seen = old_data[off].seen;
14572         u32 prog_len;
14573         int i;
14574
14575         /* aux info at OFF always needs adjustment, no matter fast path
14576          * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
14577          * original insn at old prog.
14578          */
14579         old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
14580
14581         if (cnt == 1)
14582                 return;
14583         prog_len = new_prog->len;
14584
14585         memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
14586         memcpy(new_data + off + cnt - 1, old_data + off,
14587                sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
14588         for (i = off; i < off + cnt - 1; i++) {
14589                 /* Expand insni[off]'s seen count to the patched range. */
14590                 new_data[i].seen = old_seen;
14591                 new_data[i].zext_dst = insn_has_def32(env, insn + i);
14592         }
14593         env->insn_aux_data = new_data;
14594         vfree(old_data);
14595 }
14596
14597 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
14598 {
14599         int i;
14600
14601         if (len == 1)
14602                 return;
14603         /* NOTE: fake 'exit' subprog should be updated as well. */
14604         for (i = 0; i <= env->subprog_cnt; i++) {
14605                 if (env->subprog_info[i].start <= off)
14606                         continue;
14607                 env->subprog_info[i].start += len - 1;
14608         }
14609 }
14610
14611 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
14612 {
14613         struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
14614         int i, sz = prog->aux->size_poke_tab;
14615         struct bpf_jit_poke_descriptor *desc;
14616
14617         for (i = 0; i < sz; i++) {
14618                 desc = &tab[i];
14619                 if (desc->insn_idx <= off)
14620                         continue;
14621                 desc->insn_idx += len - 1;
14622         }
14623 }
14624
14625 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
14626                                             const struct bpf_insn *patch, u32 len)
14627 {
14628         struct bpf_prog *new_prog;
14629         struct bpf_insn_aux_data *new_data = NULL;
14630
14631         if (len > 1) {
14632                 new_data = vzalloc(array_size(env->prog->len + len - 1,
14633                                               sizeof(struct bpf_insn_aux_data)));
14634                 if (!new_data)
14635                         return NULL;
14636         }
14637
14638         new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
14639         if (IS_ERR(new_prog)) {
14640                 if (PTR_ERR(new_prog) == -ERANGE)
14641                         verbose(env,
14642                                 "insn %d cannot be patched due to 16-bit range\n",
14643                                 env->insn_aux_data[off].orig_idx);
14644                 vfree(new_data);
14645                 return NULL;
14646         }
14647         adjust_insn_aux_data(env, new_data, new_prog, off, len);
14648         adjust_subprog_starts(env, off, len);
14649         adjust_poke_descs(new_prog, off, len);
14650         return new_prog;
14651 }
14652
14653 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
14654                                               u32 off, u32 cnt)
14655 {
14656         int i, j;
14657
14658         /* find first prog starting at or after off (first to remove) */
14659         for (i = 0; i < env->subprog_cnt; i++)
14660                 if (env->subprog_info[i].start >= off)
14661                         break;
14662         /* find first prog starting at or after off + cnt (first to stay) */
14663         for (j = i; j < env->subprog_cnt; j++)
14664                 if (env->subprog_info[j].start >= off + cnt)
14665                         break;
14666         /* if j doesn't start exactly at off + cnt, we are just removing
14667          * the front of previous prog
14668          */
14669         if (env->subprog_info[j].start != off + cnt)
14670                 j--;
14671
14672         if (j > i) {
14673                 struct bpf_prog_aux *aux = env->prog->aux;
14674                 int move;
14675
14676                 /* move fake 'exit' subprog as well */
14677                 move = env->subprog_cnt + 1 - j;
14678
14679                 memmove(env->subprog_info + i,
14680                         env->subprog_info + j,
14681                         sizeof(*env->subprog_info) * move);
14682                 env->subprog_cnt -= j - i;
14683
14684                 /* remove func_info */
14685                 if (aux->func_info) {
14686                         move = aux->func_info_cnt - j;
14687
14688                         memmove(aux->func_info + i,
14689                                 aux->func_info + j,
14690                                 sizeof(*aux->func_info) * move);
14691                         aux->func_info_cnt -= j - i;
14692                         /* func_info->insn_off is set after all code rewrites,
14693                          * in adjust_btf_func() - no need to adjust
14694                          */
14695                 }
14696         } else {
14697                 /* convert i from "first prog to remove" to "first to adjust" */
14698                 if (env->subprog_info[i].start == off)
14699                         i++;
14700         }
14701
14702         /* update fake 'exit' subprog as well */
14703         for (; i <= env->subprog_cnt; i++)
14704                 env->subprog_info[i].start -= cnt;
14705
14706         return 0;
14707 }
14708
14709 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
14710                                       u32 cnt)
14711 {
14712         struct bpf_prog *prog = env->prog;
14713         u32 i, l_off, l_cnt, nr_linfo;
14714         struct bpf_line_info *linfo;
14715
14716         nr_linfo = prog->aux->nr_linfo;
14717         if (!nr_linfo)
14718                 return 0;
14719
14720         linfo = prog->aux->linfo;
14721
14722         /* find first line info to remove, count lines to be removed */
14723         for (i = 0; i < nr_linfo; i++)
14724                 if (linfo[i].insn_off >= off)
14725                         break;
14726
14727         l_off = i;
14728         l_cnt = 0;
14729         for (; i < nr_linfo; i++)
14730                 if (linfo[i].insn_off < off + cnt)
14731                         l_cnt++;
14732                 else
14733                         break;
14734
14735         /* First live insn doesn't match first live linfo, it needs to "inherit"
14736          * last removed linfo.  prog is already modified, so prog->len == off
14737          * means no live instructions after (tail of the program was removed).
14738          */
14739         if (prog->len != off && l_cnt &&
14740             (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
14741                 l_cnt--;
14742                 linfo[--i].insn_off = off + cnt;
14743         }
14744
14745         /* remove the line info which refer to the removed instructions */
14746         if (l_cnt) {
14747                 memmove(linfo + l_off, linfo + i,
14748                         sizeof(*linfo) * (nr_linfo - i));
14749
14750                 prog->aux->nr_linfo -= l_cnt;
14751                 nr_linfo = prog->aux->nr_linfo;
14752         }
14753
14754         /* pull all linfo[i].insn_off >= off + cnt in by cnt */
14755         for (i = l_off; i < nr_linfo; i++)
14756                 linfo[i].insn_off -= cnt;
14757
14758         /* fix up all subprogs (incl. 'exit') which start >= off */
14759         for (i = 0; i <= env->subprog_cnt; i++)
14760                 if (env->subprog_info[i].linfo_idx > l_off) {
14761                         /* program may have started in the removed region but
14762                          * may not be fully removed
14763                          */
14764                         if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
14765                                 env->subprog_info[i].linfo_idx -= l_cnt;
14766                         else
14767                                 env->subprog_info[i].linfo_idx = l_off;
14768                 }
14769
14770         return 0;
14771 }
14772
14773 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
14774 {
14775         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
14776         unsigned int orig_prog_len = env->prog->len;
14777         int err;
14778
14779         if (bpf_prog_is_dev_bound(env->prog->aux))
14780                 bpf_prog_offload_remove_insns(env, off, cnt);
14781
14782         err = bpf_remove_insns(env->prog, off, cnt);
14783         if (err)
14784                 return err;
14785
14786         err = adjust_subprog_starts_after_remove(env, off, cnt);
14787         if (err)
14788                 return err;
14789
14790         err = bpf_adj_linfo_after_remove(env, off, cnt);
14791         if (err)
14792                 return err;
14793
14794         memmove(aux_data + off, aux_data + off + cnt,
14795                 sizeof(*aux_data) * (orig_prog_len - off - cnt));
14796
14797         return 0;
14798 }
14799
14800 /* The verifier does more data flow analysis than llvm and will not
14801  * explore branches that are dead at run time. Malicious programs can
14802  * have dead code too. Therefore replace all dead at-run-time code
14803  * with 'ja -1'.
14804  *
14805  * Just nops are not optimal, e.g. if they would sit at the end of the
14806  * program and through another bug we would manage to jump there, then
14807  * we'd execute beyond program memory otherwise. Returning exception
14808  * code also wouldn't work since we can have subprogs where the dead
14809  * code could be located.
14810  */
14811 static void sanitize_dead_code(struct bpf_verifier_env *env)
14812 {
14813         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
14814         struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
14815         struct bpf_insn *insn = env->prog->insnsi;
14816         const int insn_cnt = env->prog->len;
14817         int i;
14818
14819         for (i = 0; i < insn_cnt; i++) {
14820                 if (aux_data[i].seen)
14821                         continue;
14822                 memcpy(insn + i, &trap, sizeof(trap));
14823                 aux_data[i].zext_dst = false;
14824         }
14825 }
14826
14827 static bool insn_is_cond_jump(u8 code)
14828 {
14829         u8 op;
14830
14831         if (BPF_CLASS(code) == BPF_JMP32)
14832                 return true;
14833
14834         if (BPF_CLASS(code) != BPF_JMP)
14835                 return false;
14836
14837         op = BPF_OP(code);
14838         return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
14839 }
14840
14841 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
14842 {
14843         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
14844         struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
14845         struct bpf_insn *insn = env->prog->insnsi;
14846         const int insn_cnt = env->prog->len;
14847         int i;
14848
14849         for (i = 0; i < insn_cnt; i++, insn++) {
14850                 if (!insn_is_cond_jump(insn->code))
14851                         continue;
14852
14853                 if (!aux_data[i + 1].seen)
14854                         ja.off = insn->off;
14855                 else if (!aux_data[i + 1 + insn->off].seen)
14856                         ja.off = 0;
14857                 else
14858                         continue;
14859
14860                 if (bpf_prog_is_dev_bound(env->prog->aux))
14861                         bpf_prog_offload_replace_insn(env, i, &ja);
14862
14863                 memcpy(insn, &ja, sizeof(ja));
14864         }
14865 }
14866
14867 static int opt_remove_dead_code(struct bpf_verifier_env *env)
14868 {
14869         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
14870         int insn_cnt = env->prog->len;
14871         int i, err;
14872
14873         for (i = 0; i < insn_cnt; i++) {
14874                 int j;
14875
14876                 j = 0;
14877                 while (i + j < insn_cnt && !aux_data[i + j].seen)
14878                         j++;
14879                 if (!j)
14880                         continue;
14881
14882                 err = verifier_remove_insns(env, i, j);
14883                 if (err)
14884                         return err;
14885                 insn_cnt = env->prog->len;
14886         }
14887
14888         return 0;
14889 }
14890
14891 static int opt_remove_nops(struct bpf_verifier_env *env)
14892 {
14893         const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
14894         struct bpf_insn *insn = env->prog->insnsi;
14895         int insn_cnt = env->prog->len;
14896         int i, err;
14897
14898         for (i = 0; i < insn_cnt; i++) {
14899                 if (memcmp(&insn[i], &ja, sizeof(ja)))
14900                         continue;
14901
14902                 err = verifier_remove_insns(env, i, 1);
14903                 if (err)
14904                         return err;
14905                 insn_cnt--;
14906                 i--;
14907         }
14908
14909         return 0;
14910 }
14911
14912 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
14913                                          const union bpf_attr *attr)
14914 {
14915         struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
14916         struct bpf_insn_aux_data *aux = env->insn_aux_data;
14917         int i, patch_len, delta = 0, len = env->prog->len;
14918         struct bpf_insn *insns = env->prog->insnsi;
14919         struct bpf_prog *new_prog;
14920         bool rnd_hi32;
14921
14922         rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
14923         zext_patch[1] = BPF_ZEXT_REG(0);
14924         rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
14925         rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
14926         rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
14927         for (i = 0; i < len; i++) {
14928                 int adj_idx = i + delta;
14929                 struct bpf_insn insn;
14930                 int load_reg;
14931
14932                 insn = insns[adj_idx];
14933                 load_reg = insn_def_regno(&insn);
14934                 if (!aux[adj_idx].zext_dst) {
14935                         u8 code, class;
14936                         u32 imm_rnd;
14937
14938                         if (!rnd_hi32)
14939                                 continue;
14940
14941                         code = insn.code;
14942                         class = BPF_CLASS(code);
14943                         if (load_reg == -1)
14944                                 continue;
14945
14946                         /* NOTE: arg "reg" (the fourth one) is only used for
14947                          *       BPF_STX + SRC_OP, so it is safe to pass NULL
14948                          *       here.
14949                          */
14950                         if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
14951                                 if (class == BPF_LD &&
14952                                     BPF_MODE(code) == BPF_IMM)
14953                                         i++;
14954                                 continue;
14955                         }
14956
14957                         /* ctx load could be transformed into wider load. */
14958                         if (class == BPF_LDX &&
14959                             aux[adj_idx].ptr_type == PTR_TO_CTX)
14960                                 continue;
14961
14962                         imm_rnd = get_random_u32();
14963                         rnd_hi32_patch[0] = insn;
14964                         rnd_hi32_patch[1].imm = imm_rnd;
14965                         rnd_hi32_patch[3].dst_reg = load_reg;
14966                         patch = rnd_hi32_patch;
14967                         patch_len = 4;
14968                         goto apply_patch_buffer;
14969                 }
14970
14971                 /* Add in an zero-extend instruction if a) the JIT has requested
14972                  * it or b) it's a CMPXCHG.
14973                  *
14974                  * The latter is because: BPF_CMPXCHG always loads a value into
14975                  * R0, therefore always zero-extends. However some archs'
14976                  * equivalent instruction only does this load when the
14977                  * comparison is successful. This detail of CMPXCHG is
14978                  * orthogonal to the general zero-extension behaviour of the
14979                  * CPU, so it's treated independently of bpf_jit_needs_zext.
14980                  */
14981                 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
14982                         continue;
14983
14984                 /* Zero-extension is done by the caller. */
14985                 if (bpf_pseudo_kfunc_call(&insn))
14986                         continue;
14987
14988                 if (WARN_ON(load_reg == -1)) {
14989                         verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
14990                         return -EFAULT;
14991                 }
14992
14993                 zext_patch[0] = insn;
14994                 zext_patch[1].dst_reg = load_reg;
14995                 zext_patch[1].src_reg = load_reg;
14996                 patch = zext_patch;
14997                 patch_len = 2;
14998 apply_patch_buffer:
14999                 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
15000                 if (!new_prog)
15001                         return -ENOMEM;
15002                 env->prog = new_prog;
15003                 insns = new_prog->insnsi;
15004                 aux = env->insn_aux_data;
15005                 delta += patch_len - 1;
15006         }
15007
15008         return 0;
15009 }
15010
15011 /* convert load instructions that access fields of a context type into a
15012  * sequence of instructions that access fields of the underlying structure:
15013  *     struct __sk_buff    -> struct sk_buff
15014  *     struct bpf_sock_ops -> struct sock
15015  */
15016 static int convert_ctx_accesses(struct bpf_verifier_env *env)
15017 {
15018         const struct bpf_verifier_ops *ops = env->ops;
15019         int i, cnt, size, ctx_field_size, delta = 0;
15020         const int insn_cnt = env->prog->len;
15021         struct bpf_insn insn_buf[16], *insn;
15022         u32 target_size, size_default, off;
15023         struct bpf_prog *new_prog;
15024         enum bpf_access_type type;
15025         bool is_narrower_load;
15026
15027         if (ops->gen_prologue || env->seen_direct_write) {
15028                 if (!ops->gen_prologue) {
15029                         verbose(env, "bpf verifier is misconfigured\n");
15030                         return -EINVAL;
15031                 }
15032                 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
15033                                         env->prog);
15034                 if (cnt >= ARRAY_SIZE(insn_buf)) {
15035                         verbose(env, "bpf verifier is misconfigured\n");
15036                         return -EINVAL;
15037                 } else if (cnt) {
15038                         new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
15039                         if (!new_prog)
15040                                 return -ENOMEM;
15041
15042                         env->prog = new_prog;
15043                         delta += cnt - 1;
15044                 }
15045         }
15046
15047         if (bpf_prog_is_dev_bound(env->prog->aux))
15048                 return 0;
15049
15050         insn = env->prog->insnsi + delta;
15051
15052         for (i = 0; i < insn_cnt; i++, insn++) {
15053                 bpf_convert_ctx_access_t convert_ctx_access;
15054                 bool ctx_access;
15055
15056                 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
15057                     insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
15058                     insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
15059                     insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
15060                         type = BPF_READ;
15061                         ctx_access = true;
15062                 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
15063                            insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
15064                            insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
15065                            insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
15066                            insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
15067                            insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
15068                            insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
15069                            insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
15070                         type = BPF_WRITE;
15071                         ctx_access = BPF_CLASS(insn->code) == BPF_STX;
15072                 } else {
15073                         continue;
15074                 }
15075
15076                 if (type == BPF_WRITE &&
15077                     env->insn_aux_data[i + delta].sanitize_stack_spill) {
15078                         struct bpf_insn patch[] = {
15079                                 *insn,
15080                                 BPF_ST_NOSPEC(),
15081                         };
15082
15083                         cnt = ARRAY_SIZE(patch);
15084                         new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
15085                         if (!new_prog)
15086                                 return -ENOMEM;
15087
15088                         delta    += cnt - 1;
15089                         env->prog = new_prog;
15090                         insn      = new_prog->insnsi + i + delta;
15091                         continue;
15092                 }
15093
15094                 if (!ctx_access)
15095                         continue;
15096
15097                 switch ((int)env->insn_aux_data[i + delta].ptr_type) {
15098                 case PTR_TO_CTX:
15099                         if (!ops->convert_ctx_access)
15100                                 continue;
15101                         convert_ctx_access = ops->convert_ctx_access;
15102                         break;
15103                 case PTR_TO_SOCKET:
15104                 case PTR_TO_SOCK_COMMON:
15105                         convert_ctx_access = bpf_sock_convert_ctx_access;
15106                         break;
15107                 case PTR_TO_TCP_SOCK:
15108                         convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
15109                         break;
15110                 case PTR_TO_XDP_SOCK:
15111                         convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
15112                         break;
15113                 case PTR_TO_BTF_ID:
15114                 case PTR_TO_BTF_ID | PTR_UNTRUSTED:
15115                 /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
15116                  * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
15117                  * be said once it is marked PTR_UNTRUSTED, hence we must handle
15118                  * any faults for loads into such types. BPF_WRITE is disallowed
15119                  * for this case.
15120                  */
15121                 case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
15122                         if (type == BPF_READ) {
15123                                 insn->code = BPF_LDX | BPF_PROBE_MEM |
15124                                         BPF_SIZE((insn)->code);
15125                                 env->prog->aux->num_exentries++;
15126                         }
15127                         continue;
15128                 default:
15129                         continue;
15130                 }
15131
15132                 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
15133                 size = BPF_LDST_BYTES(insn);
15134
15135                 /* If the read access is a narrower load of the field,
15136                  * convert to a 4/8-byte load, to minimum program type specific
15137                  * convert_ctx_access changes. If conversion is successful,
15138                  * we will apply proper mask to the result.
15139                  */
15140                 is_narrower_load = size < ctx_field_size;
15141                 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
15142                 off = insn->off;
15143                 if (is_narrower_load) {
15144                         u8 size_code;
15145
15146                         if (type == BPF_WRITE) {
15147                                 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
15148                                 return -EINVAL;
15149                         }
15150
15151                         size_code = BPF_H;
15152                         if (ctx_field_size == 4)
15153                                 size_code = BPF_W;
15154                         else if (ctx_field_size == 8)
15155                                 size_code = BPF_DW;
15156
15157                         insn->off = off & ~(size_default - 1);
15158                         insn->code = BPF_LDX | BPF_MEM | size_code;
15159                 }
15160
15161                 target_size = 0;
15162                 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
15163                                          &target_size);
15164                 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
15165                     (ctx_field_size && !target_size)) {
15166                         verbose(env, "bpf verifier is misconfigured\n");
15167                         return -EINVAL;
15168                 }
15169
15170                 if (is_narrower_load && size < target_size) {
15171                         u8 shift = bpf_ctx_narrow_access_offset(
15172                                 off, size, size_default) * 8;
15173                         if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
15174                                 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
15175                                 return -EINVAL;
15176                         }
15177                         if (ctx_field_size <= 4) {
15178                                 if (shift)
15179                                         insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
15180                                                                         insn->dst_reg,
15181                                                                         shift);
15182                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
15183                                                                 (1 << size * 8) - 1);
15184                         } else {
15185                                 if (shift)
15186                                         insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
15187                                                                         insn->dst_reg,
15188                                                                         shift);
15189                                 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
15190                                                                 (1ULL << size * 8) - 1);
15191                         }
15192                 }
15193
15194                 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15195                 if (!new_prog)
15196                         return -ENOMEM;
15197
15198                 delta += cnt - 1;
15199
15200                 /* keep walking new program and skip insns we just inserted */
15201                 env->prog = new_prog;
15202                 insn      = new_prog->insnsi + i + delta;
15203         }
15204
15205         return 0;
15206 }
15207
15208 static int jit_subprogs(struct bpf_verifier_env *env)
15209 {
15210         struct bpf_prog *prog = env->prog, **func, *tmp;
15211         int i, j, subprog_start, subprog_end = 0, len, subprog;
15212         struct bpf_map *map_ptr;
15213         struct bpf_insn *insn;
15214         void *old_bpf_func;
15215         int err, num_exentries;
15216
15217         if (env->subprog_cnt <= 1)
15218                 return 0;
15219
15220         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
15221                 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
15222                         continue;
15223
15224                 /* Upon error here we cannot fall back to interpreter but
15225                  * need a hard reject of the program. Thus -EFAULT is
15226                  * propagated in any case.
15227                  */
15228                 subprog = find_subprog(env, i + insn->imm + 1);
15229                 if (subprog < 0) {
15230                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
15231                                   i + insn->imm + 1);
15232                         return -EFAULT;
15233                 }
15234                 /* temporarily remember subprog id inside insn instead of
15235                  * aux_data, since next loop will split up all insns into funcs
15236                  */
15237                 insn->off = subprog;
15238                 /* remember original imm in case JIT fails and fallback
15239                  * to interpreter will be needed
15240                  */
15241                 env->insn_aux_data[i].call_imm = insn->imm;
15242                 /* point imm to __bpf_call_base+1 from JITs point of view */
15243                 insn->imm = 1;
15244                 if (bpf_pseudo_func(insn))
15245                         /* jit (e.g. x86_64) may emit fewer instructions
15246                          * if it learns a u32 imm is the same as a u64 imm.
15247                          * Force a non zero here.
15248                          */
15249                         insn[1].imm = 1;
15250         }
15251
15252         err = bpf_prog_alloc_jited_linfo(prog);
15253         if (err)
15254                 goto out_undo_insn;
15255
15256         err = -ENOMEM;
15257         func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
15258         if (!func)
15259                 goto out_undo_insn;
15260
15261         for (i = 0; i < env->subprog_cnt; i++) {
15262                 subprog_start = subprog_end;
15263                 subprog_end = env->subprog_info[i + 1].start;
15264
15265                 len = subprog_end - subprog_start;
15266                 /* bpf_prog_run() doesn't call subprogs directly,
15267                  * hence main prog stats include the runtime of subprogs.
15268                  * subprogs don't have IDs and not reachable via prog_get_next_id
15269                  * func[i]->stats will never be accessed and stays NULL
15270                  */
15271                 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
15272                 if (!func[i])
15273                         goto out_free;
15274                 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
15275                        len * sizeof(struct bpf_insn));
15276                 func[i]->type = prog->type;
15277                 func[i]->len = len;
15278                 if (bpf_prog_calc_tag(func[i]))
15279                         goto out_free;
15280                 func[i]->is_func = 1;
15281                 func[i]->aux->func_idx = i;
15282                 /* Below members will be freed only at prog->aux */
15283                 func[i]->aux->btf = prog->aux->btf;
15284                 func[i]->aux->func_info = prog->aux->func_info;
15285                 func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
15286                 func[i]->aux->poke_tab = prog->aux->poke_tab;
15287                 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
15288
15289                 for (j = 0; j < prog->aux->size_poke_tab; j++) {
15290                         struct bpf_jit_poke_descriptor *poke;
15291
15292                         poke = &prog->aux->poke_tab[j];
15293                         if (poke->insn_idx < subprog_end &&
15294                             poke->insn_idx >= subprog_start)
15295                                 poke->aux = func[i]->aux;
15296                 }
15297
15298                 func[i]->aux->name[0] = 'F';
15299                 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
15300                 func[i]->jit_requested = 1;
15301                 func[i]->blinding_requested = prog->blinding_requested;
15302                 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
15303                 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
15304                 func[i]->aux->linfo = prog->aux->linfo;
15305                 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
15306                 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
15307                 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
15308                 num_exentries = 0;
15309                 insn = func[i]->insnsi;
15310                 for (j = 0; j < func[i]->len; j++, insn++) {
15311                         if (BPF_CLASS(insn->code) == BPF_LDX &&
15312                             BPF_MODE(insn->code) == BPF_PROBE_MEM)
15313                                 num_exentries++;
15314                 }
15315                 func[i]->aux->num_exentries = num_exentries;
15316                 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
15317                 func[i] = bpf_int_jit_compile(func[i]);
15318                 if (!func[i]->jited) {
15319                         err = -ENOTSUPP;
15320                         goto out_free;
15321                 }
15322                 cond_resched();
15323         }
15324
15325         /* at this point all bpf functions were successfully JITed
15326          * now populate all bpf_calls with correct addresses and
15327          * run last pass of JIT
15328          */
15329         for (i = 0; i < env->subprog_cnt; i++) {
15330                 insn = func[i]->insnsi;
15331                 for (j = 0; j < func[i]->len; j++, insn++) {
15332                         if (bpf_pseudo_func(insn)) {
15333                                 subprog = insn->off;
15334                                 insn[0].imm = (u32)(long)func[subprog]->bpf_func;
15335                                 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
15336                                 continue;
15337                         }
15338                         if (!bpf_pseudo_call(insn))
15339                                 continue;
15340                         subprog = insn->off;
15341                         insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
15342                 }
15343
15344                 /* we use the aux data to keep a list of the start addresses
15345                  * of the JITed images for each function in the program
15346                  *
15347                  * for some architectures, such as powerpc64, the imm field
15348                  * might not be large enough to hold the offset of the start
15349                  * address of the callee's JITed image from __bpf_call_base
15350                  *
15351                  * in such cases, we can lookup the start address of a callee
15352                  * by using its subprog id, available from the off field of
15353                  * the call instruction, as an index for this list
15354                  */
15355                 func[i]->aux->func = func;
15356                 func[i]->aux->func_cnt = env->subprog_cnt;
15357         }
15358         for (i = 0; i < env->subprog_cnt; i++) {
15359                 old_bpf_func = func[i]->bpf_func;
15360                 tmp = bpf_int_jit_compile(func[i]);
15361                 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
15362                         verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
15363                         err = -ENOTSUPP;
15364                         goto out_free;
15365                 }
15366                 cond_resched();
15367         }
15368
15369         /* finally lock prog and jit images for all functions and
15370          * populate kallsysm
15371          */
15372         for (i = 0; i < env->subprog_cnt; i++) {
15373                 bpf_prog_lock_ro(func[i]);
15374                 bpf_prog_kallsyms_add(func[i]);
15375         }
15376
15377         /* Last step: make now unused interpreter insns from main
15378          * prog consistent for later dump requests, so they can
15379          * later look the same as if they were interpreted only.
15380          */
15381         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
15382                 if (bpf_pseudo_func(insn)) {
15383                         insn[0].imm = env->insn_aux_data[i].call_imm;
15384                         insn[1].imm = insn->off;
15385                         insn->off = 0;
15386                         continue;
15387                 }
15388                 if (!bpf_pseudo_call(insn))
15389                         continue;
15390                 insn->off = env->insn_aux_data[i].call_imm;
15391                 subprog = find_subprog(env, i + insn->off + 1);
15392                 insn->imm = subprog;
15393         }
15394
15395         prog->jited = 1;
15396         prog->bpf_func = func[0]->bpf_func;
15397         prog->jited_len = func[0]->jited_len;
15398         prog->aux->func = func;
15399         prog->aux->func_cnt = env->subprog_cnt;
15400         bpf_prog_jit_attempt_done(prog);
15401         return 0;
15402 out_free:
15403         /* We failed JIT'ing, so at this point we need to unregister poke
15404          * descriptors from subprogs, so that kernel is not attempting to
15405          * patch it anymore as we're freeing the subprog JIT memory.
15406          */
15407         for (i = 0; i < prog->aux->size_poke_tab; i++) {
15408                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
15409                 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
15410         }
15411         /* At this point we're guaranteed that poke descriptors are not
15412          * live anymore. We can just unlink its descriptor table as it's
15413          * released with the main prog.
15414          */
15415         for (i = 0; i < env->subprog_cnt; i++) {
15416                 if (!func[i])
15417                         continue;
15418                 func[i]->aux->poke_tab = NULL;
15419                 bpf_jit_free(func[i]);
15420         }
15421         kfree(func);
15422 out_undo_insn:
15423         /* cleanup main prog to be interpreted */
15424         prog->jit_requested = 0;
15425         prog->blinding_requested = 0;
15426         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
15427                 if (!bpf_pseudo_call(insn))
15428                         continue;
15429                 insn->off = 0;
15430                 insn->imm = env->insn_aux_data[i].call_imm;
15431         }
15432         bpf_prog_jit_attempt_done(prog);
15433         return err;
15434 }
15435
15436 static int fixup_call_args(struct bpf_verifier_env *env)
15437 {
15438 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
15439         struct bpf_prog *prog = env->prog;
15440         struct bpf_insn *insn = prog->insnsi;
15441         bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
15442         int i, depth;
15443 #endif
15444         int err = 0;
15445
15446         if (env->prog->jit_requested &&
15447             !bpf_prog_is_dev_bound(env->prog->aux)) {
15448                 err = jit_subprogs(env);
15449                 if (err == 0)
15450                         return 0;
15451                 if (err == -EFAULT)
15452                         return err;
15453         }
15454 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
15455         if (has_kfunc_call) {
15456                 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
15457                 return -EINVAL;
15458         }
15459         if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
15460                 /* When JIT fails the progs with bpf2bpf calls and tail_calls
15461                  * have to be rejected, since interpreter doesn't support them yet.
15462                  */
15463                 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
15464                 return -EINVAL;
15465         }
15466         for (i = 0; i < prog->len; i++, insn++) {
15467                 if (bpf_pseudo_func(insn)) {
15468                         /* When JIT fails the progs with callback calls
15469                          * have to be rejected, since interpreter doesn't support them yet.
15470                          */
15471                         verbose(env, "callbacks are not allowed in non-JITed programs\n");
15472                         return -EINVAL;
15473                 }
15474
15475                 if (!bpf_pseudo_call(insn))
15476                         continue;
15477                 depth = get_callee_stack_depth(env, insn, i);
15478                 if (depth < 0)
15479                         return depth;
15480                 bpf_patch_call_args(insn, depth);
15481         }
15482         err = 0;
15483 #endif
15484         return err;
15485 }
15486
15487 static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
15488                             struct bpf_insn *insn_buf, int insn_idx, int *cnt)
15489 {
15490         const struct bpf_kfunc_desc *desc;
15491
15492         if (!insn->imm) {
15493                 verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
15494                 return -EINVAL;
15495         }
15496
15497         /* insn->imm has the btf func_id. Replace it with
15498          * an address (relative to __bpf_call_base).
15499          */
15500         desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
15501         if (!desc) {
15502                 verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
15503                         insn->imm);
15504                 return -EFAULT;
15505         }
15506
15507         *cnt = 0;
15508         insn->imm = desc->imm;
15509         if (insn->off)
15510                 return 0;
15511         if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
15512                 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
15513                 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
15514                 u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
15515
15516                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
15517                 insn_buf[1] = addr[0];
15518                 insn_buf[2] = addr[1];
15519                 insn_buf[3] = *insn;
15520                 *cnt = 4;
15521         } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
15522                 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
15523                 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
15524
15525                 insn_buf[0] = addr[0];
15526                 insn_buf[1] = addr[1];
15527                 insn_buf[2] = *insn;
15528                 *cnt = 3;
15529         } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
15530                    desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
15531                 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
15532                 *cnt = 1;
15533         }
15534         return 0;
15535 }
15536
15537 /* Do various post-verification rewrites in a single program pass.
15538  * These rewrites simplify JIT and interpreter implementations.
15539  */
15540 static int do_misc_fixups(struct bpf_verifier_env *env)
15541 {
15542         struct bpf_prog *prog = env->prog;
15543         enum bpf_attach_type eatype = prog->expected_attach_type;
15544         enum bpf_prog_type prog_type = resolve_prog_type(prog);
15545         struct bpf_insn *insn = prog->insnsi;
15546         const struct bpf_func_proto *fn;
15547         const int insn_cnt = prog->len;
15548         const struct bpf_map_ops *ops;
15549         struct bpf_insn_aux_data *aux;
15550         struct bpf_insn insn_buf[16];
15551         struct bpf_prog *new_prog;
15552         struct bpf_map *map_ptr;
15553         int i, ret, cnt, delta = 0;
15554
15555         for (i = 0; i < insn_cnt; i++, insn++) {
15556                 /* Make divide-by-zero exceptions impossible. */
15557                 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
15558                     insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
15559                     insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
15560                     insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
15561                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
15562                         bool isdiv = BPF_OP(insn->code) == BPF_DIV;
15563                         struct bpf_insn *patchlet;
15564                         struct bpf_insn chk_and_div[] = {
15565                                 /* [R,W]x div 0 -> 0 */
15566                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
15567                                              BPF_JNE | BPF_K, insn->src_reg,
15568                                              0, 2, 0),
15569                                 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
15570                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
15571                                 *insn,
15572                         };
15573                         struct bpf_insn chk_and_mod[] = {
15574                                 /* [R,W]x mod 0 -> [R,W]x */
15575                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
15576                                              BPF_JEQ | BPF_K, insn->src_reg,
15577                                              0, 1 + (is64 ? 0 : 1), 0),
15578                                 *insn,
15579                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
15580                                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
15581                         };
15582
15583                         patchlet = isdiv ? chk_and_div : chk_and_mod;
15584                         cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
15585                                       ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
15586
15587                         new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
15588                         if (!new_prog)
15589                                 return -ENOMEM;
15590
15591                         delta    += cnt - 1;
15592                         env->prog = prog = new_prog;
15593                         insn      = new_prog->insnsi + i + delta;
15594                         continue;
15595                 }
15596
15597                 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
15598                 if (BPF_CLASS(insn->code) == BPF_LD &&
15599                     (BPF_MODE(insn->code) == BPF_ABS ||
15600                      BPF_MODE(insn->code) == BPF_IND)) {
15601                         cnt = env->ops->gen_ld_abs(insn, insn_buf);
15602                         if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
15603                                 verbose(env, "bpf verifier is misconfigured\n");
15604                                 return -EINVAL;
15605                         }
15606
15607                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15608                         if (!new_prog)
15609                                 return -ENOMEM;
15610
15611                         delta    += cnt - 1;
15612                         env->prog = prog = new_prog;
15613                         insn      = new_prog->insnsi + i + delta;
15614                         continue;
15615                 }
15616
15617                 /* Rewrite pointer arithmetic to mitigate speculation attacks. */
15618                 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
15619                     insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
15620                         const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
15621                         const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
15622                         struct bpf_insn *patch = &insn_buf[0];
15623                         bool issrc, isneg, isimm;
15624                         u32 off_reg;
15625
15626                         aux = &env->insn_aux_data[i + delta];
15627                         if (!aux->alu_state ||
15628                             aux->alu_state == BPF_ALU_NON_POINTER)
15629                                 continue;
15630
15631                         isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
15632                         issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
15633                                 BPF_ALU_SANITIZE_SRC;
15634                         isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
15635
15636                         off_reg = issrc ? insn->src_reg : insn->dst_reg;
15637                         if (isimm) {
15638                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
15639                         } else {
15640                                 if (isneg)
15641                                         *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
15642                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
15643                                 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
15644                                 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
15645                                 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
15646                                 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
15647                                 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
15648                         }
15649                         if (!issrc)
15650                                 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
15651                         insn->src_reg = BPF_REG_AX;
15652                         if (isneg)
15653                                 insn->code = insn->code == code_add ?
15654                                              code_sub : code_add;
15655                         *patch++ = *insn;
15656                         if (issrc && isneg && !isimm)
15657                                 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
15658                         cnt = patch - insn_buf;
15659
15660                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15661                         if (!new_prog)
15662                                 return -ENOMEM;
15663
15664                         delta    += cnt - 1;
15665                         env->prog = prog = new_prog;
15666                         insn      = new_prog->insnsi + i + delta;
15667                         continue;
15668                 }
15669
15670                 if (insn->code != (BPF_JMP | BPF_CALL))
15671                         continue;
15672                 if (insn->src_reg == BPF_PSEUDO_CALL)
15673                         continue;
15674                 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
15675                         ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
15676                         if (ret)
15677                                 return ret;
15678                         if (cnt == 0)
15679                                 continue;
15680
15681                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15682                         if (!new_prog)
15683                                 return -ENOMEM;
15684
15685                         delta    += cnt - 1;
15686                         env->prog = prog = new_prog;
15687                         insn      = new_prog->insnsi + i + delta;
15688                         continue;
15689                 }
15690
15691                 if (insn->imm == BPF_FUNC_get_route_realm)
15692                         prog->dst_needed = 1;
15693                 if (insn->imm == BPF_FUNC_get_prandom_u32)
15694                         bpf_user_rnd_init_once();
15695                 if (insn->imm == BPF_FUNC_override_return)
15696                         prog->kprobe_override = 1;
15697                 if (insn->imm == BPF_FUNC_tail_call) {
15698                         /* If we tail call into other programs, we
15699                          * cannot make any assumptions since they can
15700                          * be replaced dynamically during runtime in
15701                          * the program array.
15702                          */
15703                         prog->cb_access = 1;
15704                         if (!allow_tail_call_in_subprogs(env))
15705                                 prog->aux->stack_depth = MAX_BPF_STACK;
15706                         prog->aux->max_pkt_offset = MAX_PACKET_OFF;
15707
15708                         /* mark bpf_tail_call as different opcode to avoid
15709                          * conditional branch in the interpreter for every normal
15710                          * call and to prevent accidental JITing by JIT compiler
15711                          * that doesn't support bpf_tail_call yet
15712                          */
15713                         insn->imm = 0;
15714                         insn->code = BPF_JMP | BPF_TAIL_CALL;
15715
15716                         aux = &env->insn_aux_data[i + delta];
15717                         if (env->bpf_capable && !prog->blinding_requested &&
15718                             prog->jit_requested &&
15719                             !bpf_map_key_poisoned(aux) &&
15720                             !bpf_map_ptr_poisoned(aux) &&
15721                             !bpf_map_ptr_unpriv(aux)) {
15722                                 struct bpf_jit_poke_descriptor desc = {
15723                                         .reason = BPF_POKE_REASON_TAIL_CALL,
15724                                         .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
15725                                         .tail_call.key = bpf_map_key_immediate(aux),
15726                                         .insn_idx = i + delta,
15727                                 };
15728
15729                                 ret = bpf_jit_add_poke_descriptor(prog, &desc);
15730                                 if (ret < 0) {
15731                                         verbose(env, "adding tail call poke descriptor failed\n");
15732                                         return ret;
15733                                 }
15734
15735                                 insn->imm = ret + 1;
15736                                 continue;
15737                         }
15738
15739                         if (!bpf_map_ptr_unpriv(aux))
15740                                 continue;
15741
15742                         /* instead of changing every JIT dealing with tail_call
15743                          * emit two extra insns:
15744                          * if (index >= max_entries) goto out;
15745                          * index &= array->index_mask;
15746                          * to avoid out-of-bounds cpu speculation
15747                          */
15748                         if (bpf_map_ptr_poisoned(aux)) {
15749                                 verbose(env, "tail_call abusing map_ptr\n");
15750                                 return -EINVAL;
15751                         }
15752
15753                         map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
15754                         insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
15755                                                   map_ptr->max_entries, 2);
15756                         insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
15757                                                     container_of(map_ptr,
15758                                                                  struct bpf_array,
15759                                                                  map)->index_mask);
15760                         insn_buf[2] = *insn;
15761                         cnt = 3;
15762                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15763                         if (!new_prog)
15764                                 return -ENOMEM;
15765
15766                         delta    += cnt - 1;
15767                         env->prog = prog = new_prog;
15768                         insn      = new_prog->insnsi + i + delta;
15769                         continue;
15770                 }
15771
15772                 if (insn->imm == BPF_FUNC_timer_set_callback) {
15773                         /* The verifier will process callback_fn as many times as necessary
15774                          * with different maps and the register states prepared by
15775                          * set_timer_callback_state will be accurate.
15776                          *
15777                          * The following use case is valid:
15778                          *   map1 is shared by prog1, prog2, prog3.
15779                          *   prog1 calls bpf_timer_init for some map1 elements
15780                          *   prog2 calls bpf_timer_set_callback for some map1 elements.
15781                          *     Those that were not bpf_timer_init-ed will return -EINVAL.
15782                          *   prog3 calls bpf_timer_start for some map1 elements.
15783                          *     Those that were not both bpf_timer_init-ed and
15784                          *     bpf_timer_set_callback-ed will return -EINVAL.
15785                          */
15786                         struct bpf_insn ld_addrs[2] = {
15787                                 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
15788                         };
15789
15790                         insn_buf[0] = ld_addrs[0];
15791                         insn_buf[1] = ld_addrs[1];
15792                         insn_buf[2] = *insn;
15793                         cnt = 3;
15794
15795                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15796                         if (!new_prog)
15797                                 return -ENOMEM;
15798
15799                         delta    += cnt - 1;
15800                         env->prog = prog = new_prog;
15801                         insn      = new_prog->insnsi + i + delta;
15802                         goto patch_call_imm;
15803                 }
15804
15805                 if (is_storage_get_function(insn->imm)) {
15806                         if (!env->prog->aux->sleepable ||
15807                             env->insn_aux_data[i + delta].storage_get_func_atomic)
15808                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
15809                         else
15810                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
15811                         insn_buf[1] = *insn;
15812                         cnt = 2;
15813
15814                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15815                         if (!new_prog)
15816                                 return -ENOMEM;
15817
15818                         delta += cnt - 1;
15819                         env->prog = prog = new_prog;
15820                         insn = new_prog->insnsi + i + delta;
15821                         goto patch_call_imm;
15822                 }
15823
15824                 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
15825                  * and other inlining handlers are currently limited to 64 bit
15826                  * only.
15827                  */
15828                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
15829                     (insn->imm == BPF_FUNC_map_lookup_elem ||
15830                      insn->imm == BPF_FUNC_map_update_elem ||
15831                      insn->imm == BPF_FUNC_map_delete_elem ||
15832                      insn->imm == BPF_FUNC_map_push_elem   ||
15833                      insn->imm == BPF_FUNC_map_pop_elem    ||
15834                      insn->imm == BPF_FUNC_map_peek_elem   ||
15835                      insn->imm == BPF_FUNC_redirect_map    ||
15836                      insn->imm == BPF_FUNC_for_each_map_elem ||
15837                      insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
15838                         aux = &env->insn_aux_data[i + delta];
15839                         if (bpf_map_ptr_poisoned(aux))
15840                                 goto patch_call_imm;
15841
15842                         map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
15843                         ops = map_ptr->ops;
15844                         if (insn->imm == BPF_FUNC_map_lookup_elem &&
15845                             ops->map_gen_lookup) {
15846                                 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
15847                                 if (cnt == -EOPNOTSUPP)
15848                                         goto patch_map_ops_generic;
15849                                 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
15850                                         verbose(env, "bpf verifier is misconfigured\n");
15851                                         return -EINVAL;
15852                                 }
15853
15854                                 new_prog = bpf_patch_insn_data(env, i + delta,
15855                                                                insn_buf, cnt);
15856                                 if (!new_prog)
15857                                         return -ENOMEM;
15858
15859                                 delta    += cnt - 1;
15860                                 env->prog = prog = new_prog;
15861                                 insn      = new_prog->insnsi + i + delta;
15862                                 continue;
15863                         }
15864
15865                         BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
15866                                      (void *(*)(struct bpf_map *map, void *key))NULL));
15867                         BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
15868                                      (int (*)(struct bpf_map *map, void *key))NULL));
15869                         BUILD_BUG_ON(!__same_type(ops->map_update_elem,
15870                                      (int (*)(struct bpf_map *map, void *key, void *value,
15871                                               u64 flags))NULL));
15872                         BUILD_BUG_ON(!__same_type(ops->map_push_elem,
15873                                      (int (*)(struct bpf_map *map, void *value,
15874                                               u64 flags))NULL));
15875                         BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
15876                                      (int (*)(struct bpf_map *map, void *value))NULL));
15877                         BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
15878                                      (int (*)(struct bpf_map *map, void *value))NULL));
15879                         BUILD_BUG_ON(!__same_type(ops->map_redirect,
15880                                      (int (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
15881                         BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
15882                                      (int (*)(struct bpf_map *map,
15883                                               bpf_callback_t callback_fn,
15884                                               void *callback_ctx,
15885                                               u64 flags))NULL));
15886                         BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
15887                                      (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
15888
15889 patch_map_ops_generic:
15890                         switch (insn->imm) {
15891                         case BPF_FUNC_map_lookup_elem:
15892                                 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
15893                                 continue;
15894                         case BPF_FUNC_map_update_elem:
15895                                 insn->imm = BPF_CALL_IMM(ops->map_update_elem);
15896                                 continue;
15897                         case BPF_FUNC_map_delete_elem:
15898                                 insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
15899                                 continue;
15900                         case BPF_FUNC_map_push_elem:
15901                                 insn->imm = BPF_CALL_IMM(ops->map_push_elem);
15902                                 continue;
15903                         case BPF_FUNC_map_pop_elem:
15904                                 insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
15905                                 continue;
15906                         case BPF_FUNC_map_peek_elem:
15907                                 insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
15908                                 continue;
15909                         case BPF_FUNC_redirect_map:
15910                                 insn->imm = BPF_CALL_IMM(ops->map_redirect);
15911                                 continue;
15912                         case BPF_FUNC_for_each_map_elem:
15913                                 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
15914                                 continue;
15915                         case BPF_FUNC_map_lookup_percpu_elem:
15916                                 insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
15917                                 continue;
15918                         }
15919
15920                         goto patch_call_imm;
15921                 }
15922
15923                 /* Implement bpf_jiffies64 inline. */
15924                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
15925                     insn->imm == BPF_FUNC_jiffies64) {
15926                         struct bpf_insn ld_jiffies_addr[2] = {
15927                                 BPF_LD_IMM64(BPF_REG_0,
15928                                              (unsigned long)&jiffies),
15929                         };
15930
15931                         insn_buf[0] = ld_jiffies_addr[0];
15932                         insn_buf[1] = ld_jiffies_addr[1];
15933                         insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
15934                                                   BPF_REG_0, 0);
15935                         cnt = 3;
15936
15937                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
15938                                                        cnt);
15939                         if (!new_prog)
15940                                 return -ENOMEM;
15941
15942                         delta    += cnt - 1;
15943                         env->prog = prog = new_prog;
15944                         insn      = new_prog->insnsi + i + delta;
15945                         continue;
15946                 }
15947
15948                 /* Implement bpf_get_func_arg inline. */
15949                 if (prog_type == BPF_PROG_TYPE_TRACING &&
15950                     insn->imm == BPF_FUNC_get_func_arg) {
15951                         /* Load nr_args from ctx - 8 */
15952                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
15953                         insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
15954                         insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
15955                         insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
15956                         insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
15957                         insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
15958                         insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
15959                         insn_buf[7] = BPF_JMP_A(1);
15960                         insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
15961                         cnt = 9;
15962
15963                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15964                         if (!new_prog)
15965                                 return -ENOMEM;
15966
15967                         delta    += cnt - 1;
15968                         env->prog = prog = new_prog;
15969                         insn      = new_prog->insnsi + i + delta;
15970                         continue;
15971                 }
15972
15973                 /* Implement bpf_get_func_ret inline. */
15974                 if (prog_type == BPF_PROG_TYPE_TRACING &&
15975                     insn->imm == BPF_FUNC_get_func_ret) {
15976                         if (eatype == BPF_TRACE_FEXIT ||
15977                             eatype == BPF_MODIFY_RETURN) {
15978                                 /* Load nr_args from ctx - 8 */
15979                                 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
15980                                 insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
15981                                 insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
15982                                 insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
15983                                 insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
15984                                 insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
15985                                 cnt = 6;
15986                         } else {
15987                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
15988                                 cnt = 1;
15989                         }
15990
15991                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15992                         if (!new_prog)
15993                                 return -ENOMEM;
15994
15995                         delta    += cnt - 1;
15996                         env->prog = prog = new_prog;
15997                         insn      = new_prog->insnsi + i + delta;
15998                         continue;
15999                 }
16000
16001                 /* Implement get_func_arg_cnt inline. */
16002                 if (prog_type == BPF_PROG_TYPE_TRACING &&
16003                     insn->imm == BPF_FUNC_get_func_arg_cnt) {
16004                         /* Load nr_args from ctx - 8 */
16005                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
16006
16007                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
16008                         if (!new_prog)
16009                                 return -ENOMEM;
16010
16011                         env->prog = prog = new_prog;
16012                         insn      = new_prog->insnsi + i + delta;
16013                         continue;
16014                 }
16015
16016                 /* Implement bpf_get_func_ip inline. */
16017                 if (prog_type == BPF_PROG_TYPE_TRACING &&
16018                     insn->imm == BPF_FUNC_get_func_ip) {
16019                         /* Load IP address from ctx - 16 */
16020                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
16021
16022                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
16023                         if (!new_prog)
16024                                 return -ENOMEM;
16025
16026                         env->prog = prog = new_prog;
16027                         insn      = new_prog->insnsi + i + delta;
16028                         continue;
16029                 }
16030
16031 patch_call_imm:
16032                 fn = env->ops->get_func_proto(insn->imm, env->prog);
16033                 /* all functions that have prototype and verifier allowed
16034                  * programs to call them, must be real in-kernel functions
16035                  */
16036                 if (!fn->func) {
16037                         verbose(env,
16038                                 "kernel subsystem misconfigured func %s#%d\n",
16039                                 func_id_name(insn->imm), insn->imm);
16040                         return -EFAULT;
16041                 }
16042                 insn->imm = fn->func - __bpf_call_base;
16043         }
16044
16045         /* Since poke tab is now finalized, publish aux to tracker. */
16046         for (i = 0; i < prog->aux->size_poke_tab; i++) {
16047                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
16048                 if (!map_ptr->ops->map_poke_track ||
16049                     !map_ptr->ops->map_poke_untrack ||
16050                     !map_ptr->ops->map_poke_run) {
16051                         verbose(env, "bpf verifier is misconfigured\n");
16052                         return -EINVAL;
16053                 }
16054
16055                 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
16056                 if (ret < 0) {
16057                         verbose(env, "tracking tail call prog failed\n");
16058                         return ret;
16059                 }
16060         }
16061
16062         sort_kfunc_descs_by_imm(env->prog);
16063
16064         return 0;
16065 }
16066
16067 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
16068                                         int position,
16069                                         s32 stack_base,
16070                                         u32 callback_subprogno,
16071                                         u32 *cnt)
16072 {
16073         s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
16074         s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
16075         s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
16076         int reg_loop_max = BPF_REG_6;
16077         int reg_loop_cnt = BPF_REG_7;
16078         int reg_loop_ctx = BPF_REG_8;
16079
16080         struct bpf_prog *new_prog;
16081         u32 callback_start;
16082         u32 call_insn_offset;
16083         s32 callback_offset;
16084
16085         /* This represents an inlined version of bpf_iter.c:bpf_loop,
16086          * be careful to modify this code in sync.
16087          */
16088         struct bpf_insn insn_buf[] = {
16089                 /* Return error and jump to the end of the patch if
16090                  * expected number of iterations is too big.
16091                  */
16092                 BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
16093                 BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
16094                 BPF_JMP_IMM(BPF_JA, 0, 0, 16),
16095                 /* spill R6, R7, R8 to use these as loop vars */
16096                 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
16097                 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
16098                 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
16099                 /* initialize loop vars */
16100                 BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
16101                 BPF_MOV32_IMM(reg_loop_cnt, 0),
16102                 BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
16103                 /* loop header,
16104                  * if reg_loop_cnt >= reg_loop_max skip the loop body
16105                  */
16106                 BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
16107                 /* callback call,
16108                  * correct callback offset would be set after patching
16109                  */
16110                 BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
16111                 BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
16112                 BPF_CALL_REL(0),
16113                 /* increment loop counter */
16114                 BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
16115                 /* jump to loop header if callback returned 0 */
16116                 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
16117                 /* return value of bpf_loop,
16118                  * set R0 to the number of iterations
16119                  */
16120                 BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
16121                 /* restore original values of R6, R7, R8 */
16122                 BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
16123                 BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
16124                 BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
16125         };
16126
16127         *cnt = ARRAY_SIZE(insn_buf);
16128         new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
16129         if (!new_prog)
16130                 return new_prog;
16131
16132         /* callback start is known only after patching */
16133         callback_start = env->subprog_info[callback_subprogno].start;
16134         /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
16135         call_insn_offset = position + 12;
16136         callback_offset = callback_start - call_insn_offset - 1;
16137         new_prog->insnsi[call_insn_offset].imm = callback_offset;
16138
16139         return new_prog;
16140 }
16141
16142 static bool is_bpf_loop_call(struct bpf_insn *insn)
16143 {
16144         return insn->code == (BPF_JMP | BPF_CALL) &&
16145                 insn->src_reg == 0 &&
16146                 insn->imm == BPF_FUNC_loop;
16147 }
16148
16149 /* For all sub-programs in the program (including main) check
16150  * insn_aux_data to see if there are bpf_loop calls that require
16151  * inlining. If such calls are found the calls are replaced with a
16152  * sequence of instructions produced by `inline_bpf_loop` function and
16153  * subprog stack_depth is increased by the size of 3 registers.
16154  * This stack space is used to spill values of the R6, R7, R8.  These
16155  * registers are used to store the loop bound, counter and context
16156  * variables.
16157  */
16158 static int optimize_bpf_loop(struct bpf_verifier_env *env)
16159 {
16160         struct bpf_subprog_info *subprogs = env->subprog_info;
16161         int i, cur_subprog = 0, cnt, delta = 0;
16162         struct bpf_insn *insn = env->prog->insnsi;
16163         int insn_cnt = env->prog->len;
16164         u16 stack_depth = subprogs[cur_subprog].stack_depth;
16165         u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
16166         u16 stack_depth_extra = 0;
16167
16168         for (i = 0; i < insn_cnt; i++, insn++) {
16169                 struct bpf_loop_inline_state *inline_state =
16170                         &env->insn_aux_data[i + delta].loop_inline_state;
16171
16172                 if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
16173                         struct bpf_prog *new_prog;
16174
16175                         stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
16176                         new_prog = inline_bpf_loop(env,
16177                                                    i + delta,
16178                                                    -(stack_depth + stack_depth_extra),
16179                                                    inline_state->callback_subprogno,
16180                                                    &cnt);
16181                         if (!new_prog)
16182                                 return -ENOMEM;
16183
16184                         delta     += cnt - 1;
16185                         env->prog  = new_prog;
16186                         insn       = new_prog->insnsi + i + delta;
16187                 }
16188
16189                 if (subprogs[cur_subprog + 1].start == i + delta + 1) {
16190                         subprogs[cur_subprog].stack_depth += stack_depth_extra;
16191                         cur_subprog++;
16192                         stack_depth = subprogs[cur_subprog].stack_depth;
16193                         stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
16194                         stack_depth_extra = 0;
16195                 }
16196         }
16197
16198         env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
16199
16200         return 0;
16201 }
16202
16203 static void free_states(struct bpf_verifier_env *env)
16204 {
16205         struct bpf_verifier_state_list *sl, *sln;
16206         int i;
16207
16208         sl = env->free_list;
16209         while (sl) {
16210                 sln = sl->next;
16211                 free_verifier_state(&sl->state, false);
16212                 kfree(sl);
16213                 sl = sln;
16214         }
16215         env->free_list = NULL;
16216
16217         if (!env->explored_states)
16218                 return;
16219
16220         for (i = 0; i < state_htab_size(env); i++) {
16221                 sl = env->explored_states[i];
16222
16223                 while (sl) {
16224                         sln = sl->next;
16225                         free_verifier_state(&sl->state, false);
16226                         kfree(sl);
16227                         sl = sln;
16228                 }
16229                 env->explored_states[i] = NULL;
16230         }
16231 }
16232
16233 static int do_check_common(struct bpf_verifier_env *env, int subprog)
16234 {
16235         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
16236         struct bpf_verifier_state *state;
16237         struct bpf_reg_state *regs;
16238         int ret, i;
16239
16240         env->prev_linfo = NULL;
16241         env->pass_cnt++;
16242
16243         state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
16244         if (!state)
16245                 return -ENOMEM;
16246         state->curframe = 0;
16247         state->speculative = false;
16248         state->branches = 1;
16249         state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
16250         if (!state->frame[0]) {
16251                 kfree(state);
16252                 return -ENOMEM;
16253         }
16254         env->cur_state = state;
16255         init_func_state(env, state->frame[0],
16256                         BPF_MAIN_FUNC /* callsite */,
16257                         0 /* frameno */,
16258                         subprog);
16259         state->first_insn_idx = env->subprog_info[subprog].start;
16260         state->last_insn_idx = -1;
16261
16262         regs = state->frame[state->curframe]->regs;
16263         if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
16264                 ret = btf_prepare_func_args(env, subprog, regs);
16265                 if (ret)
16266                         goto out;
16267                 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
16268                         if (regs[i].type == PTR_TO_CTX)
16269                                 mark_reg_known_zero(env, regs, i);
16270                         else if (regs[i].type == SCALAR_VALUE)
16271                                 mark_reg_unknown(env, regs, i);
16272                         else if (base_type(regs[i].type) == PTR_TO_MEM) {
16273                                 const u32 mem_size = regs[i].mem_size;
16274
16275                                 mark_reg_known_zero(env, regs, i);
16276                                 regs[i].mem_size = mem_size;
16277                                 regs[i].id = ++env->id_gen;
16278                         }
16279                 }
16280         } else {
16281                 /* 1st arg to a function */
16282                 regs[BPF_REG_1].type = PTR_TO_CTX;
16283                 mark_reg_known_zero(env, regs, BPF_REG_1);
16284                 ret = btf_check_subprog_arg_match(env, subprog, regs);
16285                 if (ret == -EFAULT)
16286                         /* unlikely verifier bug. abort.
16287                          * ret == 0 and ret < 0 are sadly acceptable for
16288                          * main() function due to backward compatibility.
16289                          * Like socket filter program may be written as:
16290                          * int bpf_prog(struct pt_regs *ctx)
16291                          * and never dereference that ctx in the program.
16292                          * 'struct pt_regs' is a type mismatch for socket
16293                          * filter that should be using 'struct __sk_buff'.
16294                          */
16295                         goto out;
16296         }
16297
16298         ret = do_check(env);
16299 out:
16300         /* check for NULL is necessary, since cur_state can be freed inside
16301          * do_check() under memory pressure.
16302          */
16303         if (env->cur_state) {
16304                 free_verifier_state(env->cur_state, true);
16305                 env->cur_state = NULL;
16306         }
16307         while (!pop_stack(env, NULL, NULL, false));
16308         if (!ret && pop_log)
16309                 bpf_vlog_reset(&env->log, 0);
16310         free_states(env);
16311         return ret;
16312 }
16313
16314 /* Verify all global functions in a BPF program one by one based on their BTF.
16315  * All global functions must pass verification. Otherwise the whole program is rejected.
16316  * Consider:
16317  * int bar(int);
16318  * int foo(int f)
16319  * {
16320  *    return bar(f);
16321  * }
16322  * int bar(int b)
16323  * {
16324  *    ...
16325  * }
16326  * foo() will be verified first for R1=any_scalar_value. During verification it
16327  * will be assumed that bar() already verified successfully and call to bar()
16328  * from foo() will be checked for type match only. Later bar() will be verified
16329  * independently to check that it's safe for R1=any_scalar_value.
16330  */
16331 static int do_check_subprogs(struct bpf_verifier_env *env)
16332 {
16333         struct bpf_prog_aux *aux = env->prog->aux;
16334         int i, ret;
16335
16336         if (!aux->func_info)
16337                 return 0;
16338
16339         for (i = 1; i < env->subprog_cnt; i++) {
16340                 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
16341                         continue;
16342                 env->insn_idx = env->subprog_info[i].start;
16343                 WARN_ON_ONCE(env->insn_idx == 0);
16344                 ret = do_check_common(env, i);
16345                 if (ret) {
16346                         return ret;
16347                 } else if (env->log.level & BPF_LOG_LEVEL) {
16348                         verbose(env,
16349                                 "Func#%d is safe for any args that match its prototype\n",
16350                                 i);
16351                 }
16352         }
16353         return 0;
16354 }
16355
16356 static int do_check_main(struct bpf_verifier_env *env)
16357 {
16358         int ret;
16359
16360         env->insn_idx = 0;
16361         ret = do_check_common(env, 0);
16362         if (!ret)
16363                 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
16364         return ret;
16365 }
16366
16367
16368 static void print_verification_stats(struct bpf_verifier_env *env)
16369 {
16370         int i;
16371
16372         if (env->log.level & BPF_LOG_STATS) {
16373                 verbose(env, "verification time %lld usec\n",
16374                         div_u64(env->verification_time, 1000));
16375                 verbose(env, "stack depth ");
16376                 for (i = 0; i < env->subprog_cnt; i++) {
16377                         u32 depth = env->subprog_info[i].stack_depth;
16378
16379                         verbose(env, "%d", depth);
16380                         if (i + 1 < env->subprog_cnt)
16381                                 verbose(env, "+");
16382                 }
16383                 verbose(env, "\n");
16384         }
16385         verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
16386                 "total_states %d peak_states %d mark_read %d\n",
16387                 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
16388                 env->max_states_per_insn, env->total_states,
16389                 env->peak_states, env->longest_mark_read_walk);
16390 }
16391
16392 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
16393 {
16394         const struct btf_type *t, *func_proto;
16395         const struct bpf_struct_ops *st_ops;
16396         const struct btf_member *member;
16397         struct bpf_prog *prog = env->prog;
16398         u32 btf_id, member_idx;
16399         const char *mname;
16400
16401         if (!prog->gpl_compatible) {
16402                 verbose(env, "struct ops programs must have a GPL compatible license\n");
16403                 return -EINVAL;
16404         }
16405
16406         btf_id = prog->aux->attach_btf_id;
16407         st_ops = bpf_struct_ops_find(btf_id);
16408         if (!st_ops) {
16409                 verbose(env, "attach_btf_id %u is not a supported struct\n",
16410                         btf_id);
16411                 return -ENOTSUPP;
16412         }
16413
16414         t = st_ops->type;
16415         member_idx = prog->expected_attach_type;
16416         if (member_idx >= btf_type_vlen(t)) {
16417                 verbose(env, "attach to invalid member idx %u of struct %s\n",
16418                         member_idx, st_ops->name);
16419                 return -EINVAL;
16420         }
16421
16422         member = &btf_type_member(t)[member_idx];
16423         mname = btf_name_by_offset(btf_vmlinux, member->name_off);
16424         func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
16425                                                NULL);
16426         if (!func_proto) {
16427                 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
16428                         mname, member_idx, st_ops->name);
16429                 return -EINVAL;
16430         }
16431
16432         if (st_ops->check_member) {
16433                 int err = st_ops->check_member(t, member);
16434
16435                 if (err) {
16436                         verbose(env, "attach to unsupported member %s of struct %s\n",
16437                                 mname, st_ops->name);
16438                         return err;
16439                 }
16440         }
16441
16442         prog->aux->attach_func_proto = func_proto;
16443         prog->aux->attach_func_name = mname;
16444         env->ops = st_ops->verifier_ops;
16445
16446         return 0;
16447 }
16448 #define SECURITY_PREFIX "security_"
16449
16450 static int check_attach_modify_return(unsigned long addr, const char *func_name)
16451 {
16452         if (within_error_injection_list(addr) ||
16453             !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
16454                 return 0;
16455
16456         return -EINVAL;
16457 }
16458
16459 /* list of non-sleepable functions that are otherwise on
16460  * ALLOW_ERROR_INJECTION list
16461  */
16462 BTF_SET_START(btf_non_sleepable_error_inject)
16463 /* Three functions below can be called from sleepable and non-sleepable context.
16464  * Assume non-sleepable from bpf safety point of view.
16465  */
16466 BTF_ID(func, __filemap_add_folio)
16467 BTF_ID(func, should_fail_alloc_page)
16468 BTF_ID(func, should_failslab)
16469 BTF_SET_END(btf_non_sleepable_error_inject)
16470
16471 static int check_non_sleepable_error_inject(u32 btf_id)
16472 {
16473         return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
16474 }
16475
16476 int bpf_check_attach_target(struct bpf_verifier_log *log,
16477                             const struct bpf_prog *prog,
16478                             const struct bpf_prog *tgt_prog,
16479                             u32 btf_id,
16480                             struct bpf_attach_target_info *tgt_info)
16481 {
16482         bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
16483         const char prefix[] = "btf_trace_";
16484         int ret = 0, subprog = -1, i;
16485         const struct btf_type *t;
16486         bool conservative = true;
16487         const char *tname;
16488         struct btf *btf;
16489         long addr = 0;
16490
16491         if (!btf_id) {
16492                 bpf_log(log, "Tracing programs must provide btf_id\n");
16493                 return -EINVAL;
16494         }
16495         btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
16496         if (!btf) {
16497                 bpf_log(log,
16498                         "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
16499                 return -EINVAL;
16500         }
16501         t = btf_type_by_id(btf, btf_id);
16502         if (!t) {
16503                 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
16504                 return -EINVAL;
16505         }
16506         tname = btf_name_by_offset(btf, t->name_off);
16507         if (!tname) {
16508                 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
16509                 return -EINVAL;
16510         }
16511         if (tgt_prog) {
16512                 struct bpf_prog_aux *aux = tgt_prog->aux;
16513
16514                 for (i = 0; i < aux->func_info_cnt; i++)
16515                         if (aux->func_info[i].type_id == btf_id) {
16516                                 subprog = i;
16517                                 break;
16518                         }
16519                 if (subprog == -1) {
16520                         bpf_log(log, "Subprog %s doesn't exist\n", tname);
16521                         return -EINVAL;
16522                 }
16523                 conservative = aux->func_info_aux[subprog].unreliable;
16524                 if (prog_extension) {
16525                         if (conservative) {
16526                                 bpf_log(log,
16527                                         "Cannot replace static functions\n");
16528                                 return -EINVAL;
16529                         }
16530                         if (!prog->jit_requested) {
16531                                 bpf_log(log,
16532                                         "Extension programs should be JITed\n");
16533                                 return -EINVAL;
16534                         }
16535                 }
16536                 if (!tgt_prog->jited) {
16537                         bpf_log(log, "Can attach to only JITed progs\n");
16538                         return -EINVAL;
16539                 }
16540                 if (tgt_prog->type == prog->type) {
16541                         /* Cannot fentry/fexit another fentry/fexit program.
16542                          * Cannot attach program extension to another extension.
16543                          * It's ok to attach fentry/fexit to extension program.
16544                          */
16545                         bpf_log(log, "Cannot recursively attach\n");
16546                         return -EINVAL;
16547                 }
16548                 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
16549                     prog_extension &&
16550                     (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
16551                      tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
16552                         /* Program extensions can extend all program types
16553                          * except fentry/fexit. The reason is the following.
16554                          * The fentry/fexit programs are used for performance
16555                          * analysis, stats and can be attached to any program
16556                          * type except themselves. When extension program is
16557                          * replacing XDP function it is necessary to allow
16558                          * performance analysis of all functions. Both original
16559                          * XDP program and its program extension. Hence
16560                          * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
16561                          * allowed. If extending of fentry/fexit was allowed it
16562                          * would be possible to create long call chain
16563                          * fentry->extension->fentry->extension beyond
16564                          * reasonable stack size. Hence extending fentry is not
16565                          * allowed.
16566                          */
16567                         bpf_log(log, "Cannot extend fentry/fexit\n");
16568                         return -EINVAL;
16569                 }
16570         } else {
16571                 if (prog_extension) {
16572                         bpf_log(log, "Cannot replace kernel functions\n");
16573                         return -EINVAL;
16574                 }
16575         }
16576
16577         switch (prog->expected_attach_type) {
16578         case BPF_TRACE_RAW_TP:
16579                 if (tgt_prog) {
16580                         bpf_log(log,
16581                                 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
16582                         return -EINVAL;
16583                 }
16584                 if (!btf_type_is_typedef(t)) {
16585                         bpf_log(log, "attach_btf_id %u is not a typedef\n",
16586                                 btf_id);
16587                         return -EINVAL;
16588                 }
16589                 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
16590                         bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
16591                                 btf_id, tname);
16592                         return -EINVAL;
16593                 }
16594                 tname += sizeof(prefix) - 1;
16595                 t = btf_type_by_id(btf, t->type);
16596                 if (!btf_type_is_ptr(t))
16597                         /* should never happen in valid vmlinux build */
16598                         return -EINVAL;
16599                 t = btf_type_by_id(btf, t->type);
16600                 if (!btf_type_is_func_proto(t))
16601                         /* should never happen in valid vmlinux build */
16602                         return -EINVAL;
16603
16604                 break;
16605         case BPF_TRACE_ITER:
16606                 if (!btf_type_is_func(t)) {
16607                         bpf_log(log, "attach_btf_id %u is not a function\n",
16608                                 btf_id);
16609                         return -EINVAL;
16610                 }
16611                 t = btf_type_by_id(btf, t->type);
16612                 if (!btf_type_is_func_proto(t))
16613                         return -EINVAL;
16614                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
16615                 if (ret)
16616                         return ret;
16617                 break;
16618         default:
16619                 if (!prog_extension)
16620                         return -EINVAL;
16621                 fallthrough;
16622         case BPF_MODIFY_RETURN:
16623         case BPF_LSM_MAC:
16624         case BPF_LSM_CGROUP:
16625         case BPF_TRACE_FENTRY:
16626         case BPF_TRACE_FEXIT:
16627                 if (!btf_type_is_func(t)) {
16628                         bpf_log(log, "attach_btf_id %u is not a function\n",
16629                                 btf_id);
16630                         return -EINVAL;
16631                 }
16632                 if (prog_extension &&
16633                     btf_check_type_match(log, prog, btf, t))
16634                         return -EINVAL;
16635                 t = btf_type_by_id(btf, t->type);
16636                 if (!btf_type_is_func_proto(t))
16637                         return -EINVAL;
16638
16639                 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
16640                     (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
16641                      prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
16642                         return -EINVAL;
16643
16644                 if (tgt_prog && conservative)
16645                         t = NULL;
16646
16647                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
16648                 if (ret < 0)
16649                         return ret;
16650
16651                 if (tgt_prog) {
16652                         if (subprog == 0)
16653                                 addr = (long) tgt_prog->bpf_func;
16654                         else
16655                                 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
16656                 } else {
16657                         addr = kallsyms_lookup_name(tname);
16658                         if (!addr) {
16659                                 bpf_log(log,
16660                                         "The address of function %s cannot be found\n",
16661                                         tname);
16662                                 return -ENOENT;
16663                         }
16664                 }
16665
16666                 if (prog->aux->sleepable) {
16667                         ret = -EINVAL;
16668                         switch (prog->type) {
16669                         case BPF_PROG_TYPE_TRACING:
16670
16671                                 /* fentry/fexit/fmod_ret progs can be sleepable if they are
16672                                  * attached to ALLOW_ERROR_INJECTION and are not in denylist.
16673                                  */
16674                                 if (!check_non_sleepable_error_inject(btf_id) &&
16675                                     within_error_injection_list(addr))
16676                                         ret = 0;
16677                                 /* fentry/fexit/fmod_ret progs can also be sleepable if they are
16678                                  * in the fmodret id set with the KF_SLEEPABLE flag.
16679                                  */
16680                                 else {
16681                                         u32 *flags = btf_kfunc_is_modify_return(btf, btf_id);
16682
16683                                         if (flags && (*flags & KF_SLEEPABLE))
16684                                                 ret = 0;
16685                                 }
16686                                 break;
16687                         case BPF_PROG_TYPE_LSM:
16688                                 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
16689                                  * Only some of them are sleepable.
16690                                  */
16691                                 if (bpf_lsm_is_sleepable_hook(btf_id))
16692                                         ret = 0;
16693                                 break;
16694                         default:
16695                                 break;
16696                         }
16697                         if (ret) {
16698                                 bpf_log(log, "%s is not sleepable\n", tname);
16699                                 return ret;
16700                         }
16701                 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
16702                         if (tgt_prog) {
16703                                 bpf_log(log, "can't modify return codes of BPF programs\n");
16704                                 return -EINVAL;
16705                         }
16706                         ret = -EINVAL;
16707                         if (btf_kfunc_is_modify_return(btf, btf_id) ||
16708                             !check_attach_modify_return(addr, tname))
16709                                 ret = 0;
16710                         if (ret) {
16711                                 bpf_log(log, "%s() is not modifiable\n", tname);
16712                                 return ret;
16713                         }
16714                 }
16715
16716                 break;
16717         }
16718         tgt_info->tgt_addr = addr;
16719         tgt_info->tgt_name = tname;
16720         tgt_info->tgt_type = t;
16721         return 0;
16722 }
16723
16724 BTF_SET_START(btf_id_deny)
16725 BTF_ID_UNUSED
16726 #ifdef CONFIG_SMP
16727 BTF_ID(func, migrate_disable)
16728 BTF_ID(func, migrate_enable)
16729 #endif
16730 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
16731 BTF_ID(func, rcu_read_unlock_strict)
16732 #endif
16733 BTF_SET_END(btf_id_deny)
16734
16735 static int check_attach_btf_id(struct bpf_verifier_env *env)
16736 {
16737         struct bpf_prog *prog = env->prog;
16738         struct bpf_prog *tgt_prog = prog->aux->dst_prog;
16739         struct bpf_attach_target_info tgt_info = {};
16740         u32 btf_id = prog->aux->attach_btf_id;
16741         struct bpf_trampoline *tr;
16742         int ret;
16743         u64 key;
16744
16745         if (prog->type == BPF_PROG_TYPE_SYSCALL) {
16746                 if (prog->aux->sleepable)
16747                         /* attach_btf_id checked to be zero already */
16748                         return 0;
16749                 verbose(env, "Syscall programs can only be sleepable\n");
16750                 return -EINVAL;
16751         }
16752
16753         if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
16754             prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_KPROBE) {
16755                 verbose(env, "Only fentry/fexit/fmod_ret, lsm, and kprobe/uprobe programs can be sleepable\n");
16756                 return -EINVAL;
16757         }
16758
16759         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
16760                 return check_struct_ops_btf_id(env);
16761
16762         if (prog->type != BPF_PROG_TYPE_TRACING &&
16763             prog->type != BPF_PROG_TYPE_LSM &&
16764             prog->type != BPF_PROG_TYPE_EXT)
16765                 return 0;
16766
16767         ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
16768         if (ret)
16769                 return ret;
16770
16771         if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
16772                 /* to make freplace equivalent to their targets, they need to
16773                  * inherit env->ops and expected_attach_type for the rest of the
16774                  * verification
16775                  */
16776                 env->ops = bpf_verifier_ops[tgt_prog->type];
16777                 prog->expected_attach_type = tgt_prog->expected_attach_type;
16778         }
16779
16780         /* store info about the attachment target that will be used later */
16781         prog->aux->attach_func_proto = tgt_info.tgt_type;
16782         prog->aux->attach_func_name = tgt_info.tgt_name;
16783
16784         if (tgt_prog) {
16785                 prog->aux->saved_dst_prog_type = tgt_prog->type;
16786                 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
16787         }
16788
16789         if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
16790                 prog->aux->attach_btf_trace = true;
16791                 return 0;
16792         } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
16793                 if (!bpf_iter_prog_supported(prog))
16794                         return -EINVAL;
16795                 return 0;
16796         }
16797
16798         if (prog->type == BPF_PROG_TYPE_LSM) {
16799                 ret = bpf_lsm_verify_prog(&env->log, prog);
16800                 if (ret < 0)
16801                         return ret;
16802         } else if (prog->type == BPF_PROG_TYPE_TRACING &&
16803                    btf_id_set_contains(&btf_id_deny, btf_id)) {
16804                 return -EINVAL;
16805         }
16806
16807         key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
16808         tr = bpf_trampoline_get(key, &tgt_info);
16809         if (!tr)
16810                 return -ENOMEM;
16811
16812         prog->aux->dst_trampoline = tr;
16813         return 0;
16814 }
16815
16816 struct btf *bpf_get_btf_vmlinux(void)
16817 {
16818         if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
16819                 mutex_lock(&bpf_verifier_lock);
16820                 if (!btf_vmlinux)
16821                         btf_vmlinux = btf_parse_vmlinux();
16822                 mutex_unlock(&bpf_verifier_lock);
16823         }
16824         return btf_vmlinux;
16825 }
16826
16827 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
16828 {
16829         u64 start_time = ktime_get_ns();
16830         struct bpf_verifier_env *env;
16831         struct bpf_verifier_log *log;
16832         int i, len, ret = -EINVAL;
16833         bool is_priv;
16834
16835         /* no program is valid */
16836         if (ARRAY_SIZE(bpf_verifier_ops) == 0)
16837                 return -EINVAL;
16838
16839         /* 'struct bpf_verifier_env' can be global, but since it's not small,
16840          * allocate/free it every time bpf_check() is called
16841          */
16842         env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
16843         if (!env)
16844                 return -ENOMEM;
16845         log = &env->log;
16846
16847         len = (*prog)->len;
16848         env->insn_aux_data =
16849                 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
16850         ret = -ENOMEM;
16851         if (!env->insn_aux_data)
16852                 goto err_free_env;
16853         for (i = 0; i < len; i++)
16854                 env->insn_aux_data[i].orig_idx = i;
16855         env->prog = *prog;
16856         env->ops = bpf_verifier_ops[env->prog->type];
16857         env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
16858         is_priv = bpf_capable();
16859
16860         bpf_get_btf_vmlinux();
16861
16862         /* grab the mutex to protect few globals used by verifier */
16863         if (!is_priv)
16864                 mutex_lock(&bpf_verifier_lock);
16865
16866         if (attr->log_level || attr->log_buf || attr->log_size) {
16867                 /* user requested verbose verifier output
16868                  * and supplied buffer to store the verification trace
16869                  */
16870                 log->level = attr->log_level;
16871                 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
16872                 log->len_total = attr->log_size;
16873
16874                 /* log attributes have to be sane */
16875                 if (!bpf_verifier_log_attr_valid(log)) {
16876                         ret = -EINVAL;
16877                         goto err_unlock;
16878                 }
16879         }
16880
16881         mark_verifier_state_clean(env);
16882
16883         if (IS_ERR(btf_vmlinux)) {
16884                 /* Either gcc or pahole or kernel are broken. */
16885                 verbose(env, "in-kernel BTF is malformed\n");
16886                 ret = PTR_ERR(btf_vmlinux);
16887                 goto skip_full_check;
16888         }
16889
16890         env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
16891         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
16892                 env->strict_alignment = true;
16893         if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
16894                 env->strict_alignment = false;
16895
16896         env->allow_ptr_leaks = bpf_allow_ptr_leaks();
16897         env->allow_uninit_stack = bpf_allow_uninit_stack();
16898         env->bypass_spec_v1 = bpf_bypass_spec_v1();
16899         env->bypass_spec_v4 = bpf_bypass_spec_v4();
16900         env->bpf_capable = bpf_capable();
16901         env->rcu_tag_supported = btf_vmlinux &&
16902                 btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0;
16903
16904         if (is_priv)
16905                 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
16906
16907         env->explored_states = kvcalloc(state_htab_size(env),
16908                                        sizeof(struct bpf_verifier_state_list *),
16909                                        GFP_USER);
16910         ret = -ENOMEM;
16911         if (!env->explored_states)
16912                 goto skip_full_check;
16913
16914         ret = add_subprog_and_kfunc(env);
16915         if (ret < 0)
16916                 goto skip_full_check;
16917
16918         ret = check_subprogs(env);
16919         if (ret < 0)
16920                 goto skip_full_check;
16921
16922         ret = check_btf_info(env, attr, uattr);
16923         if (ret < 0)
16924                 goto skip_full_check;
16925
16926         ret = check_attach_btf_id(env);
16927         if (ret)
16928                 goto skip_full_check;
16929
16930         ret = resolve_pseudo_ldimm64(env);
16931         if (ret < 0)
16932                 goto skip_full_check;
16933
16934         if (bpf_prog_is_dev_bound(env->prog->aux)) {
16935                 ret = bpf_prog_offload_verifier_prep(env->prog);
16936                 if (ret)
16937                         goto skip_full_check;
16938         }
16939
16940         ret = check_cfg(env);
16941         if (ret < 0)
16942                 goto skip_full_check;
16943
16944         ret = do_check_subprogs(env);
16945         ret = ret ?: do_check_main(env);
16946
16947         if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
16948                 ret = bpf_prog_offload_finalize(env);
16949
16950 skip_full_check:
16951         kvfree(env->explored_states);
16952
16953         if (ret == 0)
16954                 ret = check_max_stack_depth(env);
16955
16956         /* instruction rewrites happen after this point */
16957         if (ret == 0)
16958                 ret = optimize_bpf_loop(env);
16959
16960         if (is_priv) {
16961                 if (ret == 0)
16962                         opt_hard_wire_dead_code_branches(env);
16963                 if (ret == 0)
16964                         ret = opt_remove_dead_code(env);
16965                 if (ret == 0)
16966                         ret = opt_remove_nops(env);
16967         } else {
16968                 if (ret == 0)
16969                         sanitize_dead_code(env);
16970         }
16971
16972         if (ret == 0)
16973                 /* program is valid, convert *(u32*)(ctx + off) accesses */
16974                 ret = convert_ctx_accesses(env);
16975
16976         if (ret == 0)
16977                 ret = do_misc_fixups(env);
16978
16979         /* do 32-bit optimization after insn patching has done so those patched
16980          * insns could be handled correctly.
16981          */
16982         if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
16983                 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
16984                 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
16985                                                                      : false;
16986         }
16987
16988         if (ret == 0)
16989                 ret = fixup_call_args(env);
16990
16991         env->verification_time = ktime_get_ns() - start_time;
16992         print_verification_stats(env);
16993         env->prog->aux->verified_insns = env->insn_processed;
16994
16995         if (log->level && bpf_verifier_log_full(log))
16996                 ret = -ENOSPC;
16997         if (log->level && !log->ubuf) {
16998                 ret = -EFAULT;
16999                 goto err_release_maps;
17000         }
17001
17002         if (ret)
17003                 goto err_release_maps;
17004
17005         if (env->used_map_cnt) {
17006                 /* if program passed verifier, update used_maps in bpf_prog_info */
17007                 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
17008                                                           sizeof(env->used_maps[0]),
17009                                                           GFP_KERNEL);
17010
17011                 if (!env->prog->aux->used_maps) {
17012                         ret = -ENOMEM;
17013                         goto err_release_maps;
17014                 }
17015
17016                 memcpy(env->prog->aux->used_maps, env->used_maps,
17017                        sizeof(env->used_maps[0]) * env->used_map_cnt);
17018                 env->prog->aux->used_map_cnt = env->used_map_cnt;
17019         }
17020         if (env->used_btf_cnt) {
17021                 /* if program passed verifier, update used_btfs in bpf_prog_aux */
17022                 env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
17023                                                           sizeof(env->used_btfs[0]),
17024                                                           GFP_KERNEL);
17025                 if (!env->prog->aux->used_btfs) {
17026                         ret = -ENOMEM;
17027                         goto err_release_maps;
17028                 }
17029
17030                 memcpy(env->prog->aux->used_btfs, env->used_btfs,
17031                        sizeof(env->used_btfs[0]) * env->used_btf_cnt);
17032                 env->prog->aux->used_btf_cnt = env->used_btf_cnt;
17033         }
17034         if (env->used_map_cnt || env->used_btf_cnt) {
17035                 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
17036                  * bpf_ld_imm64 instructions
17037                  */
17038                 convert_pseudo_ld_imm64(env);
17039         }
17040
17041         adjust_btf_func(env);
17042
17043 err_release_maps:
17044         if (!env->prog->aux->used_maps)
17045                 /* if we didn't copy map pointers into bpf_prog_info, release
17046                  * them now. Otherwise free_used_maps() will release them.
17047                  */
17048                 release_maps(env);
17049         if (!env->prog->aux->used_btfs)
17050                 release_btfs(env);
17051
17052         /* extension progs temporarily inherit the attach_type of their targets
17053            for verification purposes, so set it back to zero before returning
17054          */
17055         if (env->prog->type == BPF_PROG_TYPE_EXT)
17056                 env->prog->expected_attach_type = 0;
17057
17058         *prog = env->prog;
17059 err_unlock:
17060         if (!is_priv)
17061                 mutex_unlock(&bpf_verifier_lock);
17062         vfree(env->insn_aux_data);
17063 err_free_env:
17064         kfree(env);
17065         return ret;
17066 }