Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
authorDavid S. Miller <davem@davemloft.net>
Thu, 1 Oct 2020 21:29:01 +0000 (14:29 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 1 Oct 2020 21:29:01 +0000 (14:29 -0700)
Daniel Borkmann says:

====================
pull-request: bpf-next 2020-10-01

The following pull-request contains BPF updates for your *net-next* tree.

We've added 90 non-merge commits during the last 8 day(s) which contain
a total of 103 files changed, 7662 insertions(+), 1894 deletions(-).

Note that once bpf(/net) tree gets merged into net-next, there will be a small
merge conflict in tools/lib/bpf/btf.c between commit 1245008122d7 ("libbpf: Fix
native endian assumption when parsing BTF") from the bpf tree and the commit
3289959b97ca ("libbpf: Support BTF loading and raw data output in both endianness")
from the bpf-next tree. Correct resolution would be to stick with bpf-next, it
should look like:

  [...]
        /* check BTF magic */
        if (fread(&magic, 1, sizeof(magic), f) < sizeof(magic)) {
                err = -EIO;
                goto err_out;
        }
        if (magic != BTF_MAGIC && magic != bswap_16(BTF_MAGIC)) {
                /* definitely not a raw BTF */
                err = -EPROTO;
                goto err_out;
        }

        /* get file size */
  [...]

The main changes are:

1) Add bpf_snprintf_btf() and bpf_seq_printf_btf() helpers to support displaying
   BTF-based kernel data structures out of BPF programs, from Alan Maguire.

2) Speed up RCU tasks trace grace periods by a factor of 50 & fix a few race
   conditions exposed by it. It was discussed to take these via BPF and
   networking tree to get better testing exposure, from Paul E. McKenney.

3) Support multi-attach for freplace programs, needed for incremental attachment
   of multiple XDP progs using libxdp dispatcher model, from Toke Høiland-Jørgensen.

4) libbpf support for appending new BTF types at the end of BTF object, allowing
   intrusive changes of prog's BTF (useful for future linking), from Andrii Nakryiko.

5) Several BPF helper improvements e.g. avoid atomic op in cookie generator and add
   a redirect helper into neighboring subsys, from Daniel Borkmann.

6) Allow map updates on sockmaps from bpf_iter context in order to migrate sockmaps
   from one to another, from Lorenz Bauer.

7) Fix 32 bit to 64 bit assignment from latest alu32 bounds tracking which caused
   a verifier issue due to type downgrade to scalar, from John Fastabend.

8) Follow-up on tail-call support in BPF subprogs which optimizes x64 JIT prologue
   and epilogue sections, from Maciej Fijalkowski.

9) Add an option to perf RB map to improve sharing of event entries by avoiding remove-
   on-close behavior. Also, add BPF_PROG_TEST_RUN for raw_tracepoint, from Song Liu.

10) Fix a crash in AF_XDP's socket_release when memory allocation for UMEMs fails,
    from Magnus Karlsson.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
104 files changed:
arch/x86/net/bpf_jit_comp.c
include/linux/bpf.h
include/linux/bpf_verifier.h
include/linux/btf.h
include/linux/cookie.h [new file with mode: 0644]
include/linux/rcupdate_trace.h
include/linux/skbuff.h
include/linux/sock_diag.h
include/net/bpf_sk_storage.h
include/net/net_namespace.h
include/uapi/linux/bpf.h
kernel/bpf/arraymap.c
kernel/bpf/bpf_iter.c
kernel/bpf/bpf_lsm.c
kernel/bpf/btf.c
kernel/bpf/core.c
kernel/bpf/cpumap.c
kernel/bpf/helpers.c
kernel/bpf/preload/.gitignore [new file with mode: 0644]
kernel/bpf/preload/Makefile
kernel/bpf/preload/iterators/iterators.bpf.c
kernel/bpf/preload/iterators/iterators.skel.h
kernel/bpf/reuseport_array.c
kernel/bpf/syscall.c
kernel/bpf/trampoline.c
kernel/bpf/verifier.c
kernel/rcu/tasks.h
kernel/trace/bpf_trace.c
net/bpf/test_run.c
net/core/bpf_sk_storage.c
net/core/filter.c
net/core/net_namespace.c
net/core/sock_diag.c
net/core/sock_map.c
net/ipv4/bpf_tcp_ca.c
net/xdp/xsk.c
net/xdp/xsk_queue.h
samples/bpf/sockex3_kern.c
scripts/bpf_helpers_doc.py
tools/include/uapi/linux/bpf.h
tools/lib/bpf/Makefile
tools/lib/bpf/bpf.c
tools/lib/bpf/bpf.h
tools/lib/bpf/bpf_helpers.h
tools/lib/bpf/btf.c
tools/lib/bpf/btf.h
tools/lib/bpf/btf_dump.c
tools/lib/bpf/hashmap.h
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/bpf/libbpf.map
tools/lib/bpf/libbpf_internal.h
tools/testing/selftests/bpf/.gitignore
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bench.c
tools/testing/selftests/bpf/benchs/bench_rename.c
tools/testing/selftests/bpf/bpf_tcp_helpers.h
tools/testing/selftests/bpf/prog_tests/bpf_iter.c
tools/testing/selftests/bpf/prog_tests/btf_dump.c
tools/testing/selftests/bpf/prog_tests/btf_endian.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/btf_write.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
tools/testing/selftests/bpf/prog_tests/snprintf_btf.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/sock_fields.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
tools/testing/selftests/bpf/prog_tests/test_overhead.c
tools/testing/selftests/bpf/prog_tests/trace_ext.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_cubic.c
tools/testing/selftests/bpf/progs/bpf_dctcp.c
tools/testing/selftests/bpf/progs/bpf_flow.c
tools/testing/selftests/bpf/progs/bpf_iter.h
tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h [deleted file]
tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/btf_ptr.h [new file with mode: 0644]
tools/testing/selftests/bpf/progs/fmod_ret_freplace.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/freplace_get_constant.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/netif_receive_skb.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/tailcall1.c
tools/testing/selftests/bpf/progs/tailcall2.c
tools/testing/selftests/bpf/progs/tailcall3.c
tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_overhead.c
tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_sk_lookup.c
tools/testing/selftests/bpf/progs/test_sock_fields.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_sock_fields_kern.c [deleted file]
tools/testing/selftests/bpf/progs/test_tc_neigh.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_trace_ext.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_progs.h
tools/testing/selftests/bpf/test_sock_fields.c [deleted file]
tools/testing/selftests/bpf/test_tc_neigh.sh [new file with mode: 0755]
tools/testing/selftests/bpf/verifier/and.c
tools/testing/selftests/bpf/verifier/ref_tracking.c

index 26f4327..796506d 100644 (file)
@@ -281,7 +281,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
        EMIT1(0x55);             /* push rbp */
        EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
        /* sub rsp, rounded_stack_depth */
-       EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
+       if (stack_depth)
+               EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
        if (tail_call_reachable)
                EMIT1(0x50);         /* push rax */
        *pprog = prog;
@@ -407,9 +408,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
        int tcc_off = -4 - round_up(stack_depth, 8);
        u8 *prog = *pprog;
        int pop_bytes = 0;
-       int off1 = 49;
-       int off2 = 38;
-       int off3 = 16;
+       int off1 = 42;
+       int off2 = 31;
+       int off3 = 9;
        int cnt = 0;
 
        /* count the additional bytes used for popping callee regs from stack
@@ -421,6 +422,12 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
        off2 += pop_bytes;
        off3 += pop_bytes;
 
+       if (stack_depth) {
+               off1 += 7;
+               off2 += 7;
+               off3 += 7;
+       }
+
        /*
         * rdi - pointer to ctx
         * rsi - pointer to bpf_array
@@ -465,8 +472,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
        prog = *pprog;
 
        EMIT1(0x58);                              /* pop rax */
-       EMIT3_off32(0x48, 0x81, 0xC4,             /* add rsp, sd */
-                   round_up(stack_depth, 8));
+       if (stack_depth)
+               EMIT3_off32(0x48, 0x81, 0xC4,     /* add rsp, sd */
+                           round_up(stack_depth, 8));
 
        /* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */
        EMIT4(0x48, 0x8B, 0x49,                   /* mov rcx, qword ptr [rcx + 32] */
@@ -491,7 +499,7 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
        int tcc_off = -4 - round_up(stack_depth, 8);
        u8 *prog = *pprog;
        int pop_bytes = 0;
-       int off1 = 27;
+       int off1 = 20;
        int poke_off;
        int cnt = 0;
 
@@ -506,10 +514,14 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
         * total bytes for:
         * - nop5/ jmpq $off
         * - pop callee regs
-        * - sub rsp, $val
+        * - sub rsp, $val if depth > 0
         * - pop rax
         */
-       poke_off = X86_PATCH_SIZE + pop_bytes + 7 + 1;
+       poke_off = X86_PATCH_SIZE + pop_bytes + 1;
+       if (stack_depth) {
+               poke_off += 7;
+               off1 += 7;
+       }
 
        /*
         * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
@@ -533,7 +545,8 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
        pop_callee_regs(pprog, callee_regs_used);
        prog = *pprog;
        EMIT1(0x58);                                  /* pop rax */
-       EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
+       if (stack_depth)
+               EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
 
        memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
        prog += X86_PATCH_SIZE;
@@ -1441,8 +1454,6 @@ emit_jmp:
                        /* Update cleanup_addr */
                        ctx->cleanup_addr = proglen;
                        pop_callee_regs(&prog, callee_regs_used);
-                       if (tail_call_reachable)
-                               EMIT1(0x59); /* pop rcx, get rid of tail_call_cnt */
                        EMIT1(0xC9);         /* leave */
                        EMIT1(0xC3);         /* ret */
                        break;
index fc5c901..50e5c4b 100644 (file)
@@ -292,6 +292,7 @@ enum bpf_arg_type {
        ARG_PTR_TO_ALLOC_MEM,   /* pointer to dynamically allocated memory */
        ARG_PTR_TO_ALLOC_MEM_OR_NULL,   /* pointer to dynamically allocated memory or NULL */
        ARG_CONST_ALLOC_SIZE_OR_ZERO,   /* number of allocated bytes requested */
+       ARG_PTR_TO_BTF_ID_SOCK_COMMON,  /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
        __BPF_ARG_TYPE_MAX,
 };
 
@@ -382,8 +383,22 @@ enum bpf_reg_type {
        PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
        PTR_TO_TP_BUFFER,        /* reg points to a writable raw tp's buffer */
        PTR_TO_XDP_SOCK,         /* reg points to struct xdp_sock */
-       PTR_TO_BTF_ID,           /* reg points to kernel struct */
-       PTR_TO_BTF_ID_OR_NULL,   /* reg points to kernel struct or NULL */
+       /* PTR_TO_BTF_ID points to a kernel struct that does not need
+        * to be null checked by the BPF program. This does not imply the
+        * pointer is _not_ null and in practice this can easily be a null
+        * pointer when reading pointer chains. The assumption is program
+        * context will handle null pointer dereference typically via fault
+        * handling. The verifier must keep this in mind and can make no
+        * assumptions about null or non-null when doing branch analysis.
+        * Further, when passed into helpers the helpers can not, without
+        * additional context, assume the value is non-null.
+        */
+       PTR_TO_BTF_ID,
+       /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not
+        * been checked for null. Used primarily to inform the verifier
+        * an explicit null check is required for this struct.
+        */
+       PTR_TO_BTF_ID_OR_NULL,
        PTR_TO_MEM,              /* reg points to valid memory region */
        PTR_TO_MEM_OR_NULL,      /* reg points to valid memory region or NULL */
        PTR_TO_RDONLY_BUF,       /* reg points to a readonly buffer */
@@ -591,6 +606,13 @@ struct bpf_trampoline {
        struct bpf_ksym ksym;
 };
 
+struct bpf_attach_target_info {
+       struct btf_func_model fmodel;
+       long tgt_addr;
+       const char *tgt_name;
+       const struct btf_type *tgt_type;
+};
+
 #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
 
 struct bpf_dispatcher_prog {
@@ -618,9 +640,10 @@ static __always_inline unsigned int bpf_dispatcher_nop_func(
        return bpf_func(ctx, insnsi);
 }
 #ifdef CONFIG_BPF_JIT
-struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
-int bpf_trampoline_link_prog(struct bpf_prog *prog);
-int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
+int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr);
+int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr);
+struct bpf_trampoline *bpf_trampoline_get(u64 key,
+                                         struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
 #define BPF_DISPATCHER_INIT(_name) {                           \
        .mutex = __MUTEX_INITIALIZER(_name.mutex),              \
@@ -665,17 +688,20 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym);
 void bpf_ksym_add(struct bpf_ksym *ksym);
 void bpf_ksym_del(struct bpf_ksym *ksym);
 #else
-static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
+                                          struct bpf_trampoline *tr)
 {
-       return NULL;
+       return -ENOTSUPP;
 }
-static inline int bpf_trampoline_link_prog(struct bpf_prog *prog)
+static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog,
+                                            struct bpf_trampoline *tr)
 {
        return -ENOTSUPP;
 }
-static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
+static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
+                                                       struct bpf_attach_target_info *tgt_info)
 {
-       return -ENOTSUPP;
+       return ERR_PTR(-EOPNOTSUPP);
 }
 static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
 #define DEFINE_BPF_DISPATCHER(name)
@@ -739,7 +765,11 @@ struct bpf_prog_aux {
        u32 max_rdonly_access;
        u32 max_rdwr_access;
        const struct bpf_ctx_arg_aux *ctx_arg_info;
-       struct bpf_prog *linked_prog;
+       struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
+       struct bpf_prog *dst_prog;
+       struct bpf_trampoline *dst_trampoline;
+       enum bpf_prog_type saved_dst_prog_type;
+       enum bpf_attach_type saved_dst_attach_type;
        bool verifier_zext; /* Zero extensions has been inserted by verifier. */
        bool offload_requested;
        bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
@@ -747,7 +777,6 @@ struct bpf_prog_aux {
        bool sleepable;
        bool tail_call_reachable;
        enum bpf_tramp_prog_type trampoline_prog_type;
-       struct bpf_trampoline *trampoline;
        struct hlist_node tramp_hlist;
        /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
        const struct btf_type *attach_func_proto;
@@ -1340,6 +1369,8 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr,
              union bpf_attr __user *uattr);
 void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
 
+struct btf *bpf_get_btf_vmlinux(void);
+
 /* Map specifics */
 struct xdp_buff;
 struct sk_buff;
@@ -1381,6 +1412,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
 int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
                                     const union bpf_attr *kattr,
                                     union bpf_attr __user *uattr);
+int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
+                            const union bpf_attr *kattr,
+                            union bpf_attr __user *uattr);
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
                    const struct bpf_prog *prog,
                    struct bpf_insn_access_aux *info);
@@ -1402,7 +1436,7 @@ int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
                             struct bpf_reg_state *regs);
 int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
                          struct bpf_reg_state *reg);
-int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog,
+int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
                         struct btf *btf, const struct btf_type *t);
 
 struct bpf_prog *bpf_prog_by_id(u32 id);
@@ -1793,6 +1827,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_proto;
+extern const struct bpf_func_proto bpf_snprintf_btf_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
        enum bpf_func_id func_id, const struct bpf_prog *prog);
index 2bb48a2..363b4f1 100644 (file)
@@ -347,8 +347,9 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
 
 static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
 {
-       return (log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
-               log->level == BPF_LOG_KERNEL;
+       return log &&
+               ((log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
+                log->level == BPF_LOG_KERNEL);
 }
 
 #define BPF_MAX_SUBPROGS 256
@@ -449,4 +450,17 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
 int check_ctx_reg(struct bpf_verifier_env *env,
                  const struct bpf_reg_state *reg, int regno);
 
+/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
+static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
+                                            u32 btf_id)
+{
+        return tgt_prog ? (((u64)tgt_prog->aux->id) << 32 | btf_id) : btf_id;
+}
+
+int bpf_check_attach_target(struct bpf_verifier_log *log,
+                           const struct bpf_prog *prog,
+                           const struct bpf_prog *tgt_prog,
+                           u32 btf_id,
+                           struct bpf_attach_target_info *tgt_info);
+
 #endif /* _LINUX_BPF_VERIFIER_H */
index a9af5e7..024e16f 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/types.h>
 #include <uapi/linux/btf.h>
+#include <uapi/linux/bpf.h>
 
 #define BTF_TYPE_EMIT(type) ((void)(type *)0)
 
@@ -13,6 +14,7 @@ struct btf;
 struct btf_member;
 struct btf_type;
 union bpf_attr;
+struct btf_show;
 
 extern const struct file_operations btf_fops;
 
@@ -46,8 +48,45 @@ int btf_get_info_by_fd(const struct btf *btf,
 const struct btf_type *btf_type_id_size(const struct btf *btf,
                                        u32 *type_id,
                                        u32 *ret_size);
+
+/*
+ * Options to control show behaviour.
+ *     - BTF_SHOW_COMPACT: no formatting around type information
+ *     - BTF_SHOW_NONAME: no struct/union member names/types
+ *     - BTF_SHOW_PTR_RAW: show raw (unobfuscated) pointer values;
+ *       equivalent to %px.
+ *     - BTF_SHOW_ZERO: show zero-valued struct/union members; they
+ *       are not displayed by default
+ *     - BTF_SHOW_UNSAFE: skip use of bpf_probe_read() to safely read
+ *       data before displaying it.
+ */
+#define BTF_SHOW_COMPACT       BTF_F_COMPACT
+#define BTF_SHOW_NONAME                BTF_F_NONAME
+#define BTF_SHOW_PTR_RAW       BTF_F_PTR_RAW
+#define BTF_SHOW_ZERO          BTF_F_ZERO
+#define BTF_SHOW_UNSAFE                (1ULL << 4)
+
 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
                       struct seq_file *m);
+int btf_type_seq_show_flags(const struct btf *btf, u32 type_id, void *obj,
+                           struct seq_file *m, u64 flags);
+
+/*
+ * Copy len bytes of string representation of obj of BTF type_id into buf.
+ *
+ * @btf: struct btf object
+ * @type_id: type id of type obj points to
+ * @obj: pointer to typed data
+ * @buf: buffer to write to
+ * @len: maximum length to write to buf
+ * @flags: show options (see above)
+ *
+ * Return: length that would have been/was copied as per snprintf, or
+ *        negative error.
+ */
+int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj,
+                          char *buf, int len, u64 flags);
+
 int btf_get_fd_by_id(u32 id);
 u32 btf_id(const struct btf *btf);
 bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
diff --git a/include/linux/cookie.h b/include/linux/cookie.h
new file mode 100644 (file)
index 0000000..0c159f5
--- /dev/null
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_COOKIE_H
+#define __LINUX_COOKIE_H
+
+#include <linux/atomic.h>
+#include <linux/percpu.h>
+#include <asm/local.h>
+
+struct pcpu_gen_cookie {
+       local_t nesting;
+       u64 last;
+} __aligned(16);
+
+struct gen_cookie {
+       struct pcpu_gen_cookie __percpu *local;
+       atomic64_t forward_last ____cacheline_aligned_in_smp;
+       atomic64_t reverse_last;
+};
+
+#define COOKIE_LOCAL_BATCH     4096
+
+#define DEFINE_COOKIE(name)                                            \
+       static DEFINE_PER_CPU(struct pcpu_gen_cookie, __##name);        \
+       static struct gen_cookie name = {                               \
+               .local          = &__##name,                            \
+               .forward_last   = ATOMIC64_INIT(0),                     \
+               .reverse_last   = ATOMIC64_INIT(0),                     \
+       }
+
+static __always_inline u64 gen_cookie_next(struct gen_cookie *gc)
+{
+       struct pcpu_gen_cookie *local = this_cpu_ptr(gc->local);
+       u64 val;
+
+       if (likely(local_inc_return(&local->nesting) == 1)) {
+               val = local->last;
+               if (__is_defined(CONFIG_SMP) &&
+                   unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) {
+                       s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH,
+                                                      &gc->forward_last);
+                       val = next - COOKIE_LOCAL_BATCH;
+               }
+               local->last = ++val;
+       } else {
+               val = atomic64_dec_return(&gc->reverse_last);
+       }
+       local_dec(&local->nesting);
+       return val;
+}
+
+#endif /* __LINUX_COOKIE_H */
index aaaac8a..3e7919f 100644 (file)
@@ -50,6 +50,7 @@ static inline void rcu_read_lock_trace(void)
        struct task_struct *t = current;
 
        WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
+       barrier();
        if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
            t->trc_reader_special.b.need_mb)
                smp_mb(); // Pairs with update-side barriers
@@ -72,6 +73,9 @@ static inline void rcu_read_unlock_trace(void)
 
        rcu_lock_release(&rcu_trace_lock_map);
        nesting = READ_ONCE(t->trc_reader_nesting) - 1;
+       barrier(); // Critical section before disabling.
+       // Disable IPI-based setting of .need_qs.
+       WRITE_ONCE(t->trc_reader_nesting, INT_MIN);
        if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) {
                WRITE_ONCE(t->trc_reader_nesting, nesting);
                return;  // We assume shallow reader nesting.
index 04a18e0..3d0cf37 100644 (file)
@@ -2548,6 +2548,11 @@ static inline int skb_mac_header_was_set(const struct sk_buff *skb)
        return skb->mac_header != (typeof(skb->mac_header))~0U;
 }
 
+static inline void skb_unset_mac_header(struct sk_buff *skb)
+{
+       skb->mac_header = (typeof(skb->mac_header))~0U;
+}
+
 static inline void skb_reset_mac_header(struct sk_buff *skb)
 {
        skb->mac_header = skb->data - skb->head;
index 15fe980..0b9ecd8 100644 (file)
@@ -25,7 +25,19 @@ void sock_diag_unregister(const struct sock_diag_handler *h);
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 
-u64 sock_gen_cookie(struct sock *sk);
+u64 __sock_gen_cookie(struct sock *sk);
+
+static inline u64 sock_gen_cookie(struct sock *sk)
+{
+       u64 cookie;
+
+       preempt_disable();
+       cookie = __sock_gen_cookie(sk);
+       preempt_enable();
+
+       return cookie;
+}
+
 int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie);
 void sock_diag_save_cookie(struct sock *sk, __u32 *cookie);
 
index 119f4c9..3c516dd 100644 (file)
@@ -20,8 +20,6 @@ void bpf_sk_storage_free(struct sock *sk);
 
 extern const struct bpf_func_proto bpf_sk_storage_get_proto;
 extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
-extern const struct bpf_func_proto sk_storage_get_btf_proto;
-extern const struct bpf_func_proto sk_storage_delete_btf_proto;
 
 struct bpf_local_storage_elem;
 struct bpf_sk_storage_diag;
index 2ee5901..22bc07f 100644 (file)
@@ -230,7 +230,7 @@ extern struct list_head net_namespace_list;
 struct net *get_net_ns_by_pid(pid_t pid);
 struct net *get_net_ns_by_fd(int fd);
 
-u64 net_gen_cookie(struct net *net);
+u64 __net_gen_cookie(struct net *net);
 
 #ifdef CONFIG_SYSCTL
 void ipx_register_sysctl(void);
index a228125..4f556cf 100644 (file)
@@ -414,6 +414,9 @@ enum {
 
 /* Enable memory-mapping BPF map */
        BPF_F_MMAPABLE          = (1U << 10),
+
+/* Share perf_event among processes */
+       BPF_F_PRESERVE_ELEMS    = (1U << 11),
 };
 
 /* Flags for BPF_PROG_QUERY. */
@@ -424,6 +427,11 @@ enum {
  */
 #define BPF_F_QUERY_EFFECTIVE  (1U << 0)
 
+/* Flags for BPF_PROG_TEST_RUN */
+
+/* If set, run the test on the cpu specified by bpf_attr.test.cpu */
+#define BPF_F_TEST_RUN_ON_CPU  (1U << 0)
+
 /* type for BPF_ENABLE_STATS */
 enum bpf_stats_type {
        /* enabled run_time_ns and run_cnt */
@@ -566,6 +574,8 @@ union bpf_attr {
                                                 */
                __aligned_u64   ctx_in;
                __aligned_u64   ctx_out;
+               __u32           flags;
+               __u32           cpu;
        } test;
 
        struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -632,8 +642,13 @@ union bpf_attr {
                };
                __u32           attach_type;    /* attach type */
                __u32           flags;          /* extra flags */
-               __aligned_u64   iter_info;      /* extra bpf_iter_link_info */
-               __u32           iter_info_len;  /* iter_info length */
+               union {
+                       __u32           target_btf_id;  /* btf_id of target to attach to */
+                       struct {
+                               __aligned_u64   iter_info;      /* extra bpf_iter_link_info */
+                               __u32           iter_info_len;  /* iter_info length */
+                       };
+               };
        } link_create;
 
        struct { /* struct used by BPF_LINK_UPDATE command */
@@ -2512,7 +2527,7 @@ union bpf_attr {
  *             result is from *reuse*\ **->socks**\ [] using the hash of the
  *             tuple.
  *
- * long bpf_sk_release(struct bpf_sock *sock)
+ * long bpf_sk_release(void *sock)
  *     Description
  *             Release the reference held by *sock*. *sock* must be a
  *             non-**NULL** pointer that was returned from
@@ -2692,7 +2707,7 @@ union bpf_attr {
  *             result is from *reuse*\ **->socks**\ [] using the hash of the
  *             tuple.
  *
- * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
  *     Description
  *             Check whether *iph* and *th* contain a valid SYN cookie ACK for
  *             the listening socket in *sk*.
@@ -2861,6 +2876,7 @@ union bpf_attr {
  *             0 on success.
  *
  *             **-ENOENT** if the bpf-local-storage cannot be found.
+ *             **-EINVAL** if sk is not a fullsock (e.g. a request_sock).
  *
  * long bpf_send_signal(u32 sig)
  *     Description
@@ -2877,7 +2893,7 @@ union bpf_attr {
  *
  *             **-EAGAIN** if bpf program can try again.
  *
- * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
  *     Description
  *             Try to issue a SYN cookie for the packet with corresponding
  *             IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
@@ -3106,7 +3122,7 @@ union bpf_attr {
  *     Return
  *             The id is returned or 0 in case the id could not be retrieved.
  *
- * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags)
  *     Description
  *             Helper is overloaded depending on BPF program type. This
  *             description applies to **BPF_PROG_TYPE_SCHED_CLS** and
@@ -3234,11 +3250,11 @@ union bpf_attr {
  *
  *             **-EOVERFLOW** if an overflow happened: The same object will be tried again.
  *
- * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ * u64 bpf_sk_cgroup_id(void *sk)
  *     Description
  *             Return the cgroup v2 id of the socket *sk*.
  *
- *             *sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ *             *sk* must be a non-**NULL** pointer to a socket, e.g. one
  *             returned from **bpf_sk_lookup_xxx**\ (),
  *             **bpf_sk_fullsock**\ (), etc. The format of returned id is
  *             same as in **bpf_skb_cgroup_id**\ ().
@@ -3248,7 +3264,7 @@ union bpf_attr {
  *     Return
  *             The id is returned or 0 in case the id could not be retrieved.
  *
- * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level)
  *     Description
  *             Return id of cgroup v2 that is ancestor of cgroup associated
  *             with the *sk* at the *ancestor_level*.  The root cgroup is at
@@ -3586,6 +3602,72 @@ union bpf_attr {
  *             the data in *dst*. This is a wrapper of **copy_from_user**\ ().
  *     Return
  *             0 on success, or a negative error in case of failure.
+ *
+ * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags)
+ *     Description
+ *             Use BTF to store a string representation of *ptr*->ptr in *str*,
+ *             using *ptr*->type_id.  This value should specify the type
+ *             that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1)
+ *             can be used to look up vmlinux BTF type ids. Traversing the
+ *             data structure using BTF, the type information and values are
+ *             stored in the first *str_size* - 1 bytes of *str*.  Safe copy of
+ *             the pointer data is carried out to avoid kernel crashes during
+ *             operation.  Smaller types can use string space on the stack;
+ *             larger programs can use map data to store the string
+ *             representation.
+ *
+ *             The string can be subsequently shared with userspace via
+ *             bpf_perf_event_output() or ring buffer interfaces.
+ *             bpf_trace_printk() is to be avoided as it places too small
+ *             a limit on string size to be useful.
+ *
+ *             *flags* is a combination of
+ *
+ *             **BTF_F_COMPACT**
+ *                     no formatting around type information
+ *             **BTF_F_NONAME**
+ *                     no struct/union member names/types
+ *             **BTF_F_PTR_RAW**
+ *                     show raw (unobfuscated) pointer values;
+ *                     equivalent to printk specifier %px.
+ *             **BTF_F_ZERO**
+ *                     show zero-valued struct/union members; they
+ *                     are not displayed by default
+ *
+ *     Return
+ *             The number of bytes that were written (or would have been
+ *             written if output had to be truncated due to string size),
+ *             or a negative error in cases of failure.
+ *
+ * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags)
+ *     Description
+ *             Use BTF to write to seq_write a string representation of
+ *             *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf().
+ *             *flags* are identical to those used for bpf_snprintf_btf.
+ *     Return
+ *             0 on success or a negative error in case of failure.
+ *
+ * u64 bpf_skb_cgroup_classid(struct sk_buff *skb)
+ *     Description
+ *             See **bpf_get_cgroup_classid**\ () for the main description.
+ *             This helper differs from **bpf_get_cgroup_classid**\ () in that
+ *             the cgroup v1 net_cls class is retrieved only from the *skb*'s
+ *             associated socket instead of the current process.
+ *     Return
+ *             The id is returned or 0 in case the id could not be retrieved.
+ *
+ * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ *     Description
+ *             Redirect the packet to another net device of index *ifindex*
+ *             and fill in L2 addresses from neighboring subsystem. This helper
+ *             is somewhat similar to **bpf_redirect**\ (), except that it
+ *             fills in e.g. MAC addresses based on the L3 information from
+ *             the packet. This helper is supported for IPv4 and IPv6 protocols.
+ *             The *flags* argument is reserved and must be 0. The helper is
+ *             currently only supported for tc BPF program types.
+ *     Return
+ *             The helper returns **TC_ACT_REDIRECT** on success or
+ *             **TC_ACT_SHOT** on error.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -3737,6 +3819,10 @@ union bpf_attr {
        FN(inode_storage_delete),       \
        FN(d_path),                     \
        FN(copy_from_user),             \
+       FN(snprintf_btf),               \
+       FN(seq_printf_btf),             \
+       FN(skb_cgroup_classid),         \
+       FN(redirect_neigh),             \
        /* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4845,4 +4931,34 @@ struct bpf_sk_lookup {
        __u32 local_port;       /* Host byte order */
 };
 
+/*
+ * struct btf_ptr is used for typed pointer representation; the
+ * type id is used to render the pointer data as the appropriate type
+ * via the bpf_snprintf_btf() helper described above.  A flags field -
+ * potentially to specify additional details about the BTF pointer
+ * (rather than its mode of display) - is included for future use.
+ * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately.
+ */
+struct btf_ptr {
+       void *ptr;
+       __u32 type_id;
+       __u32 flags;            /* BTF ptr flags; unused at present. */
+};
+
+/*
+ * Flags to control bpf_snprintf_btf() behaviour.
+ *     - BTF_F_COMPACT: no formatting around type information
+ *     - BTF_F_NONAME: no struct/union member names/types
+ *     - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values;
+ *       equivalent to %px.
+ *     - BTF_F_ZERO: show zero-valued struct/union members; they
+ *       are not displayed by default
+ */
+enum {
+       BTF_F_COMPACT   =       (1ULL << 0),
+       BTF_F_NONAME    =       (1ULL << 1),
+       BTF_F_PTR_RAW   =       (1ULL << 2),
+       BTF_F_ZERO      =       (1ULL << 3),
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
index e5fd312..bd777dd 100644 (file)
@@ -15,7 +15,8 @@
 #include "map_in_map.h"
 
 #define ARRAY_CREATE_FLAG_MASK \
-       (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK)
+       (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
+        BPF_F_PRESERVE_ELEMS)
 
 static void bpf_array_free_percpu(struct bpf_array *array)
 {
@@ -64,6 +65,10 @@ int array_map_alloc_check(union bpf_attr *attr)
            attr->map_flags & BPF_F_MMAPABLE)
                return -EINVAL;
 
+       if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
+           attr->map_flags & BPF_F_PRESERVE_ELEMS)
+               return -EINVAL;
+
        if (attr->value_size > KMALLOC_MAX_SIZE)
                /* if value_size is bigger, the user space won't be able to
                 * access the elements.
@@ -1134,6 +1139,9 @@ static void perf_event_fd_array_release(struct bpf_map *map,
        struct bpf_event_entry *ee;
        int i;
 
+       if (map->map_flags & BPF_F_PRESERVE_ELEMS)
+               return;
+
        rcu_read_lock();
        for (i = 0; i < array->map.max_entries; i++) {
                ee = READ_ONCE(array->ptrs[i]);
@@ -1143,12 +1151,19 @@ static void perf_event_fd_array_release(struct bpf_map *map,
        rcu_read_unlock();
 }
 
+static void perf_event_fd_array_map_free(struct bpf_map *map)
+{
+       if (map->map_flags & BPF_F_PRESERVE_ELEMS)
+               bpf_fd_array_map_clear(map);
+       fd_array_map_free(map);
+}
+
 static int perf_event_array_map_btf_id;
 const struct bpf_map_ops perf_event_array_map_ops = {
        .map_meta_equal = bpf_map_meta_equal,
        .map_alloc_check = fd_array_map_alloc_check,
        .map_alloc = array_map_alloc,
-       .map_free = fd_array_map_free,
+       .map_free = perf_event_fd_array_map_free,
        .map_get_next_key = array_map_get_next_key,
        .map_lookup_elem = fd_array_map_lookup_elem,
        .map_delete_elem = fd_array_map_delete_elem,
index 30833bb..8f10e30 100644 (file)
@@ -88,8 +88,8 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
        mutex_lock(&seq->lock);
 
        if (!seq->buf) {
-               seq->size = PAGE_SIZE;
-               seq->buf = kmalloc(seq->size, GFP_KERNEL);
+               seq->size = PAGE_SIZE << 3;
+               seq->buf = kvmalloc(seq->size, GFP_KERNEL);
                if (!seq->buf) {
                        err = -ENOMEM;
                        goto done;
index 9cd1428..78ea8a7 100644 (file)
@@ -56,9 +56,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        case BPF_FUNC_inode_storage_delete:
                return &bpf_inode_storage_delete_proto;
        case BPF_FUNC_sk_storage_get:
-               return &sk_storage_get_btf_proto;
+               return &bpf_sk_storage_get_proto;
        case BPF_FUNC_sk_storage_delete:
-               return &sk_storage_delete_btf_proto;
+               return &bpf_sk_storage_delete_proto;
        default:
                return tracing_prog_func_proto(func_id, prog);
        }
index 5d3c36e..4d0ee78 100644 (file)
@@ -284,6 +284,91 @@ static const char *btf_type_str(const struct btf_type *t)
        return btf_kind_str[BTF_INFO_KIND(t->info)];
 }
 
+/* Chunk size we use in safe copy of data to be shown. */
+#define BTF_SHOW_OBJ_SAFE_SIZE         32
+
+/*
+ * This is the maximum size of a base type value (equivalent to a
+ * 128-bit int); if we are at the end of our safe buffer and have
+ * less than 16 bytes space we can't be assured of being able
+ * to copy the next type safely, so in such cases we will initiate
+ * a new copy.
+ */
+#define BTF_SHOW_OBJ_BASE_TYPE_SIZE    16
+
+/* Type name size */
+#define BTF_SHOW_NAME_SIZE             80
+
+/*
+ * Common data to all BTF show operations. Private show functions can add
+ * their own data to a structure containing a struct btf_show and consult it
+ * in the show callback.  See btf_type_show() below.
+ *
+ * One challenge with showing nested data is we want to skip 0-valued
+ * data, but in order to figure out whether a nested object is all zeros
+ * we need to walk through it.  As a result, we need to make two passes
+ * when handling structs, unions and arrays; the first path simply looks
+ * for nonzero data, while the second actually does the display.  The first
+ * pass is signalled by show->state.depth_check being set, and if we
+ * encounter a non-zero value we set show->state.depth_to_show to
+ * the depth at which we encountered it.  When we have completed the
+ * first pass, we will know if anything needs to be displayed if
+ * depth_to_show > depth.  See btf_[struct,array]_show() for the
+ * implementation of this.
+ *
+ * Another problem is we want to ensure the data for display is safe to
+ * access.  To support this, the anonymous "struct {} obj" tracks the data
+ * object and our safe copy of it.  We copy portions of the data needed
+ * to the object "copy" buffer, but because its size is limited to
+ * BTF_SHOW_OBJ_COPY_LEN bytes, multiple copies may be required as we
+ * traverse larger objects for display.
+ *
+ * The various data type show functions all start with a call to
+ * btf_show_start_type() which returns a pointer to the safe copy
+ * of the data needed (or if BTF_SHOW_UNSAFE is specified, to the
+ * raw data itself).  btf_show_obj_safe() is responsible for
+ * using copy_from_kernel_nofault() to update the safe data if necessary
+ * as we traverse the object's data.  skbuff-like semantics are
+ * used:
+ *
+ * - obj.head points to the start of the toplevel object for display
+ * - obj.size is the size of the toplevel object
+ * - obj.data points to the current point in the original data at
+ *   which our safe data starts.  obj.data will advance as we copy
+ *   portions of the data.
+ *
+ * In most cases a single copy will suffice, but larger data structures
+ * such as "struct task_struct" will require many copies.  The logic in
+ * btf_show_obj_safe() handles the logic that determines if a new
+ * copy_from_kernel_nofault() is needed.
+ */
+struct btf_show {
+       u64 flags;
+       void *target;   /* target of show operation (seq file, buffer) */
+       void (*showfn)(struct btf_show *show, const char *fmt, va_list args);
+       const struct btf *btf;
+       /* below are used during iteration */
+       struct {
+               u8 depth;
+               u8 depth_to_show;
+               u8 depth_check;
+               u8 array_member:1,
+                  array_terminated:1;
+               u16 array_encoding;
+               u32 type_id;
+               int status;                     /* non-zero for error */
+               const struct btf_type *type;
+               const struct btf_member *member;
+               char name[BTF_SHOW_NAME_SIZE];  /* space for member name/type */
+       } state;
+       struct {
+               u32 size;
+               void *head;
+               void *data;
+               u8 safe[BTF_SHOW_OBJ_SAFE_SIZE];
+       } obj;
+};
+
 struct btf_kind_operations {
        s32 (*check_meta)(struct btf_verifier_env *env,
                          const struct btf_type *t,
@@ -300,9 +385,9 @@ struct btf_kind_operations {
                                  const struct btf_type *member_type);
        void (*log_details)(struct btf_verifier_env *env,
                            const struct btf_type *t);
-       void (*seq_show)(const struct btf *btf, const struct btf_type *t,
+       void (*show)(const struct btf *btf, const struct btf_type *t,
                         u32 type_id, void *data, u8 bits_offsets,
-                        struct seq_file *m);
+                        struct btf_show *show);
 };
 
 static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS];
@@ -679,6 +764,488 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
        return true;
 }
 
+/* Similar to btf_type_skip_modifiers() but does not skip typedefs. */
+static const struct btf_type *btf_type_skip_qualifiers(const struct btf *btf,
+                                                      u32 id)
+{
+       const struct btf_type *t = btf_type_by_id(btf, id);
+
+       while (btf_type_is_modifier(t) &&
+              BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) {
+               id = t->type;
+               t = btf_type_by_id(btf, t->type);
+       }
+
+       return t;
+}
+
+#define BTF_SHOW_MAX_ITER      10
+
+#define BTF_KIND_BIT(kind)     (1ULL << kind)
+
+/*
+ * Populate show->state.name with type name information.
+ * Format of type name is
+ *
+ * [.member_name = ] (type_name)
+ */
+static const char *btf_show_name(struct btf_show *show)
+{
+       /* BTF_MAX_ITER array suffixes "[]" */
+       const char *array_suffixes = "[][][][][][][][][][]";
+       const char *array_suffix = &array_suffixes[strlen(array_suffixes)];
+       /* BTF_MAX_ITER pointer suffixes "*" */
+       const char *ptr_suffixes = "**********";
+       const char *ptr_suffix = &ptr_suffixes[strlen(ptr_suffixes)];
+       const char *name = NULL, *prefix = "", *parens = "";
+       const struct btf_member *m = show->state.member;
+       const struct btf_type *t = show->state.type;
+       const struct btf_array *array;
+       u32 id = show->state.type_id;
+       const char *member = NULL;
+       bool show_member = false;
+       u64 kinds = 0;
+       int i;
+
+       show->state.name[0] = '\0';
+
+       /*
+        * Don't show type name if we're showing an array member;
+        * in that case we show the array type so don't need to repeat
+        * ourselves for each member.
+        */
+       if (show->state.array_member)
+               return "";
+
+       /* Retrieve member name, if any. */
+       if (m) {
+               member = btf_name_by_offset(show->btf, m->name_off);
+               show_member = strlen(member) > 0;
+               id = m->type;
+       }
+
+       /*
+        * Start with type_id, as we have resolved the struct btf_type *
+        * via btf_modifier_show() past the parent typedef to the child
+        * struct, int etc it is defined as.  In such cases, the type_id
+        * still represents the starting type while the struct btf_type *
+        * in our show->state points at the resolved type of the typedef.
+        */
+       t = btf_type_by_id(show->btf, id);
+       if (!t)
+               return "";
+
+       /*
+        * The goal here is to build up the right number of pointer and
+        * array suffixes while ensuring the type name for a typedef
+        * is represented.  Along the way we accumulate a list of
+        * BTF kinds we have encountered, since these will inform later
+        * display; for example, pointer types will not require an
+        * opening "{" for struct, we will just display the pointer value.
+        *
+        * We also want to accumulate the right number of pointer or array
+        * indices in the format string while iterating until we get to
+        * the typedef/pointee/array member target type.
+        *
+        * We start by pointing at the end of pointer and array suffix
+        * strings; as we accumulate pointers and arrays we move the pointer
+        * or array string backwards so it will show the expected number of
+        * '*' or '[]' for the type.  BTF_SHOW_MAX_ITER of nesting of pointers
+        * and/or arrays and typedefs are supported as a precaution.
+        *
+        * We also want to get typedef name while proceeding to resolve
+        * type it points to so that we can add parentheses if it is a
+        * "typedef struct" etc.
+        */
+       for (i = 0; i < BTF_SHOW_MAX_ITER; i++) {
+
+               switch (BTF_INFO_KIND(t->info)) {
+               case BTF_KIND_TYPEDEF:
+                       if (!name)
+                               name = btf_name_by_offset(show->btf,
+                                                              t->name_off);
+                       kinds |= BTF_KIND_BIT(BTF_KIND_TYPEDEF);
+                       id = t->type;
+                       break;
+               case BTF_KIND_ARRAY:
+                       kinds |= BTF_KIND_BIT(BTF_KIND_ARRAY);
+                       parens = "[";
+                       if (!t)
+                               return "";
+                       array = btf_type_array(t);
+                       if (array_suffix > array_suffixes)
+                               array_suffix -= 2;
+                       id = array->type;
+                       break;
+               case BTF_KIND_PTR:
+                       kinds |= BTF_KIND_BIT(BTF_KIND_PTR);
+                       if (ptr_suffix > ptr_suffixes)
+                               ptr_suffix -= 1;
+                       id = t->type;
+                       break;
+               default:
+                       id = 0;
+                       break;
+               }
+               if (!id)
+                       break;
+               t = btf_type_skip_qualifiers(show->btf, id);
+       }
+       /* We may not be able to represent this type; bail to be safe */
+       if (i == BTF_SHOW_MAX_ITER)
+               return "";
+
+       if (!name)
+               name = btf_name_by_offset(show->btf, t->name_off);
+
+       switch (BTF_INFO_KIND(t->info)) {
+       case BTF_KIND_STRUCT:
+       case BTF_KIND_UNION:
+               prefix = BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT ?
+                        "struct" : "union";
+               /* if it's an array of struct/union, parens is already set */
+               if (!(kinds & (BTF_KIND_BIT(BTF_KIND_ARRAY))))
+                       parens = "{";
+               break;
+       case BTF_KIND_ENUM:
+               prefix = "enum";
+               break;
+       default:
+               break;
+       }
+
+       /* pointer does not require parens */
+       if (kinds & BTF_KIND_BIT(BTF_KIND_PTR))
+               parens = "";
+       /* typedef does not require struct/union/enum prefix */
+       if (kinds & BTF_KIND_BIT(BTF_KIND_TYPEDEF))
+               prefix = "";
+
+       if (!name)
+               name = "";
+
+       /* Even if we don't want type name info, we want parentheses etc */
+       if (show->flags & BTF_SHOW_NONAME)
+               snprintf(show->state.name, sizeof(show->state.name), "%s",
+                        parens);
+       else
+               snprintf(show->state.name, sizeof(show->state.name),
+                        "%s%s%s(%s%s%s%s%s%s)%s",
+                        /* first 3 strings comprise ".member = " */
+                        show_member ? "." : "",
+                        show_member ? member : "",
+                        show_member ? " = " : "",
+                        /* ...next is our prefix (struct, enum, etc) */
+                        prefix,
+                        strlen(prefix) > 0 && strlen(name) > 0 ? " " : "",
+                        /* ...this is the type name itself */
+                        name,
+                        /* ...suffixed by the appropriate '*', '[]' suffixes */
+                        strlen(ptr_suffix) > 0 ? " " : "", ptr_suffix,
+                        array_suffix, parens);
+
+       return show->state.name;
+}
+
+static const char *__btf_show_indent(struct btf_show *show)
+{
+       const char *indents = "                                ";
+       const char *indent = &indents[strlen(indents)];
+
+       if ((indent - show->state.depth) >= indents)
+               return indent - show->state.depth;
+       return indents;
+}
+
+static const char *btf_show_indent(struct btf_show *show)
+{
+       return show->flags & BTF_SHOW_COMPACT ? "" : __btf_show_indent(show);
+}
+
+static const char *btf_show_newline(struct btf_show *show)
+{
+       return show->flags & BTF_SHOW_COMPACT ? "" : "\n";
+}
+
+static const char *btf_show_delim(struct btf_show *show)
+{
+       if (show->state.depth == 0)
+               return "";
+
+       if ((show->flags & BTF_SHOW_COMPACT) && show->state.type &&
+               BTF_INFO_KIND(show->state.type->info) == BTF_KIND_UNION)
+               return "|";
+
+       return ",";
+}
+
+__printf(2, 3) static void btf_show(struct btf_show *show, const char *fmt, ...)
+{
+       va_list args;
+
+       if (!show->state.depth_check) {
+               va_start(args, fmt);
+               show->showfn(show, fmt, args);
+               va_end(args);
+       }
+}
+
+/* Macros are used here as btf_show_type_value[s]() prepends and appends
+ * format specifiers to the format specifier passed in; these do the work of
+ * adding indentation, delimiters etc while the caller simply has to specify
+ * the type value(s) in the format specifier + value(s).
+ */
+#define btf_show_type_value(show, fmt, value)                                 \
+       do {                                                                   \
+               if ((value) != 0 || (show->flags & BTF_SHOW_ZERO) ||           \
+                   show->state.depth == 0) {                                  \
+                       btf_show(show, "%s%s" fmt "%s%s",                      \
+                                btf_show_indent(show),                        \
+                                btf_show_name(show),                          \
+                                value, btf_show_delim(show),                  \
+                                btf_show_newline(show));                      \
+                       if (show->state.depth > show->state.depth_to_show)     \
+                               show->state.depth_to_show = show->state.depth; \
+               }                                                              \
+       } while (0)
+
+#define btf_show_type_values(show, fmt, ...)                                  \
+       do {                                                                   \
+               btf_show(show, "%s%s" fmt "%s%s", btf_show_indent(show),       \
+                        btf_show_name(show),                                  \
+                        __VA_ARGS__, btf_show_delim(show),                    \
+                        btf_show_newline(show));                              \
+               if (show->state.depth > show->state.depth_to_show)             \
+                       show->state.depth_to_show = show->state.depth;         \
+       } while (0)
+
+/* How much is left to copy to safe buffer after @data? */
+static int btf_show_obj_size_left(struct btf_show *show, void *data)
+{
+       return show->obj.head + show->obj.size - data;
+}
+
+/* Is object pointed to by @data of @size already copied to our safe buffer? */
+static bool btf_show_obj_is_safe(struct btf_show *show, void *data, int size)
+{
+       return data >= show->obj.data &&
+              (data + size) < (show->obj.data + BTF_SHOW_OBJ_SAFE_SIZE);
+}
+
+/*
+ * If object pointed to by @data of @size falls within our safe buffer, return
+ * the equivalent pointer to the same safe data.  Assumes
+ * copy_from_kernel_nofault() has already happened and our safe buffer is
+ * populated.
+ */
+static void *__btf_show_obj_safe(struct btf_show *show, void *data, int size)
+{
+       if (btf_show_obj_is_safe(show, data, size))
+               return show->obj.safe + (data - show->obj.data);
+       return NULL;
+}
+
+/*
+ * Return a safe-to-access version of data pointed to by @data.
+ * We do this by copying the relevant amount of information
+ * to the struct btf_show obj.safe buffer using copy_from_kernel_nofault().
+ *
+ * If BTF_SHOW_UNSAFE is specified, just return data as-is; no
+ * safe copy is needed.
+ *
+ * Otherwise we need to determine if we have the required amount
+ * of data (determined by the @data pointer and the size of the
+ * largest base type we can encounter (represented by
+ * BTF_SHOW_OBJ_BASE_TYPE_SIZE). Having that much data ensures
+ * that we will be able to print some of the current object,
+ * and if more is needed a copy will be triggered.
+ * Some objects such as structs will not fit into the buffer;
+ * in such cases additional copies when we iterate over their
+ * members may be needed.
+ *
+ * btf_show_obj_safe() is used to return a safe buffer for
+ * btf_show_start_type(); this ensures that as we recurse into
+ * nested types we always have safe data for the given type.
+ * This approach is somewhat wasteful; it's possible for example
+ * that when iterating over a large union we'll end up copying the
+ * same data repeatedly, but the goal is safety not performance.
+ * We use stack data as opposed to per-CPU buffers because the
+ * iteration over a type can take some time, and preemption handling
+ * would greatly complicate use of the safe buffer.
+ */
+static void *btf_show_obj_safe(struct btf_show *show,
+                              const struct btf_type *t,
+                              void *data)
+{
+       const struct btf_type *rt;
+       int size_left, size;
+       void *safe = NULL;
+
+       if (show->flags & BTF_SHOW_UNSAFE)
+               return data;
+
+       rt = btf_resolve_size(show->btf, t, &size);
+       if (IS_ERR(rt)) {
+               show->state.status = PTR_ERR(rt);
+               return NULL;
+       }
+
+       /*
+        * Is this toplevel object? If so, set total object size and
+        * initialize pointers.  Otherwise check if we still fall within
+        * our safe object data.
+        */
+       if (show->state.depth == 0) {
+               show->obj.size = size;
+               show->obj.head = data;
+       } else {
+               /*
+                * If the size of the current object is > our remaining
+                * safe buffer we _may_ need to do a new copy.  However
+                * consider the case of a nested struct; it's size pushes
+                * us over the safe buffer limit, but showing any individual
+                * struct members does not.  In such cases, we don't need
+                * to initiate a fresh copy yet; however we definitely need
+                * at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes left
+                * in our buffer, regardless of the current object size.
+                * The logic here is that as we resolve types we will
+                * hit a base type at some point, and we need to be sure
+                * the next chunk of data is safely available to display
+                * that type info safely.  We cannot rely on the size of
+                * the current object here because it may be much larger
+                * than our current buffer (e.g. task_struct is 8k).
+                * All we want to do here is ensure that we can print the
+                * next basic type, which we can if either
+                * - the current type size is within the safe buffer; or
+                * - at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes are left in
+                *   the safe buffer.
+                */
+               safe = __btf_show_obj_safe(show, data,
+                                          min(size,
+                                              BTF_SHOW_OBJ_BASE_TYPE_SIZE));
+       }
+
+       /*
+        * We need a new copy to our safe object, either because we haven't
+        * yet copied and are intializing safe data, or because the data
+        * we want falls outside the boundaries of the safe object.
+        */
+       if (!safe) {
+               size_left = btf_show_obj_size_left(show, data);
+               if (size_left > BTF_SHOW_OBJ_SAFE_SIZE)
+                       size_left = BTF_SHOW_OBJ_SAFE_SIZE;
+               show->state.status = copy_from_kernel_nofault(show->obj.safe,
+                                                             data, size_left);
+               if (!show->state.status) {
+                       show->obj.data = data;
+                       safe = show->obj.safe;
+               }
+       }
+
+       return safe;
+}
+
+/*
+ * Set the type we are starting to show and return a safe data pointer
+ * to be used for showing the associated data.
+ */
+static void *btf_show_start_type(struct btf_show *show,
+                                const struct btf_type *t,
+                                u32 type_id, void *data)
+{
+       show->state.type = t;
+       show->state.type_id = type_id;
+       show->state.name[0] = '\0';
+
+       return btf_show_obj_safe(show, t, data);
+}
+
+static void btf_show_end_type(struct btf_show *show)
+{
+       show->state.type = NULL;
+       show->state.type_id = 0;
+       show->state.name[0] = '\0';
+}
+
+static void *btf_show_start_aggr_type(struct btf_show *show,
+                                     const struct btf_type *t,
+                                     u32 type_id, void *data)
+{
+       void *safe_data = btf_show_start_type(show, t, type_id, data);
+
+       if (!safe_data)
+               return safe_data;
+
+       btf_show(show, "%s%s%s", btf_show_indent(show),
+                btf_show_name(show),
+                btf_show_newline(show));
+       show->state.depth++;
+       return safe_data;
+}
+
+static void btf_show_end_aggr_type(struct btf_show *show,
+                                  const char *suffix)
+{
+       show->state.depth--;
+       btf_show(show, "%s%s%s%s", btf_show_indent(show), suffix,
+                btf_show_delim(show), btf_show_newline(show));
+       btf_show_end_type(show);
+}
+
+static void btf_show_start_member(struct btf_show *show,
+                                 const struct btf_member *m)
+{
+       show->state.member = m;
+}
+
+static void btf_show_start_array_member(struct btf_show *show)
+{
+       show->state.array_member = 1;
+       btf_show_start_member(show, NULL);
+}
+
+static void btf_show_end_member(struct btf_show *show)
+{
+       show->state.member = NULL;
+}
+
+static void btf_show_end_array_member(struct btf_show *show)
+{
+       show->state.array_member = 0;
+       btf_show_end_member(show);
+}
+
+static void *btf_show_start_array_type(struct btf_show *show,
+                                      const struct btf_type *t,
+                                      u32 type_id,
+                                      u16 array_encoding,
+                                      void *data)
+{
+       show->state.array_encoding = array_encoding;
+       show->state.array_terminated = 0;
+       return btf_show_start_aggr_type(show, t, type_id, data);
+}
+
+static void btf_show_end_array_type(struct btf_show *show)
+{
+       show->state.array_encoding = 0;
+       show->state.array_terminated = 0;
+       btf_show_end_aggr_type(show, "]");
+}
+
+static void *btf_show_start_struct_type(struct btf_show *show,
+                                       const struct btf_type *t,
+                                       u32 type_id,
+                                       void *data)
+{
+       return btf_show_start_aggr_type(show, t, type_id, data);
+}
+
+static void btf_show_end_struct_type(struct btf_show *show)
+{
+       btf_show_end_aggr_type(show, "}");
+}
+
 __printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
                                              const char *fmt, ...)
 {
@@ -1268,11 +1835,11 @@ static int btf_df_resolve(struct btf_verifier_env *env,
        return -EINVAL;
 }
 
-static void btf_df_seq_show(const struct btf *btf, const struct btf_type *t,
-                           u32 type_id, void *data, u8 bits_offsets,
-                           struct seq_file *m)
+static void btf_df_show(const struct btf *btf, const struct btf_type *t,
+                       u32 type_id, void *data, u8 bits_offsets,
+                       struct btf_show *show)
 {
-       seq_printf(m, "<unsupported kind:%u>", BTF_INFO_KIND(t->info));
+       btf_show(show, "<unsupported kind:%u>", BTF_INFO_KIND(t->info));
 }
 
 static int btf_int_check_member(struct btf_verifier_env *env,
@@ -1445,7 +2012,7 @@ static void btf_int_log(struct btf_verifier_env *env,
                         btf_int_encoding_str(BTF_INT_ENCODING(int_data)));
 }
 
-static void btf_int128_print(struct seq_file *m, void *data)
+static void btf_int128_print(struct btf_show *show, void *data)
 {
        /* data points to a __int128 number.
         * Suppose
@@ -1464,9 +2031,10 @@ static void btf_int128_print(struct seq_file *m, void *data)
        lower_num = *(u64 *)data;
 #endif
        if (upper_num == 0)
-               seq_printf(m, "0x%llx", lower_num);
+               btf_show_type_value(show, "0x%llx", lower_num);
        else
-               seq_printf(m, "0x%llx%016llx", upper_num, lower_num);
+               btf_show_type_values(show, "0x%llx%016llx", upper_num,
+                                    lower_num);
 }
 
 static void btf_int128_shift(u64 *print_num, u16 left_shift_bits,
@@ -1510,8 +2078,8 @@ static void btf_int128_shift(u64 *print_num, u16 left_shift_bits,
 #endif
 }
 
-static void btf_bitfield_seq_show(void *data, u8 bits_offset,
-                                 u8 nr_bits, struct seq_file *m)
+static void btf_bitfield_show(void *data, u8 bits_offset,
+                             u8 nr_bits, struct btf_show *show)
 {
        u16 left_shift_bits, right_shift_bits;
        u8 nr_copy_bytes;
@@ -1531,14 +2099,14 @@ static void btf_bitfield_seq_show(void *data, u8 bits_offset,
        right_shift_bits = BITS_PER_U128 - nr_bits;
 
        btf_int128_shift(print_num, left_shift_bits, right_shift_bits);
-       btf_int128_print(m, print_num);
+       btf_int128_print(show, print_num);
 }
 
 
-static void btf_int_bits_seq_show(const struct btf *btf,
-                                 const struct btf_type *t,
-                                 void *data, u8 bits_offset,
-                                 struct seq_file *m)
+static void btf_int_bits_show(const struct btf *btf,
+                             const struct btf_type *t,
+                             void *data, u8 bits_offset,
+                             struct btf_show *show)
 {
        u32 int_data = btf_type_int(t);
        u8 nr_bits = BTF_INT_BITS(int_data);
@@ -1551,55 +2119,77 @@ static void btf_int_bits_seq_show(const struct btf *btf,
        total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
        data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
        bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
-       btf_bitfield_seq_show(data, bits_offset, nr_bits, m);
+       btf_bitfield_show(data, bits_offset, nr_bits, show);
 }
 
-static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,
-                            u32 type_id, void *data, u8 bits_offset,
-                            struct seq_file *m)
+static void btf_int_show(const struct btf *btf, const struct btf_type *t,
+                        u32 type_id, void *data, u8 bits_offset,
+                        struct btf_show *show)
 {
        u32 int_data = btf_type_int(t);
        u8 encoding = BTF_INT_ENCODING(int_data);
        bool sign = encoding & BTF_INT_SIGNED;
        u8 nr_bits = BTF_INT_BITS(int_data);
+       void *safe_data;
+
+       safe_data = btf_show_start_type(show, t, type_id, data);
+       if (!safe_data)
+               return;
 
        if (bits_offset || BTF_INT_OFFSET(int_data) ||
            BITS_PER_BYTE_MASKED(nr_bits)) {
-               btf_int_bits_seq_show(btf, t, data, bits_offset, m);
-               return;
+               btf_int_bits_show(btf, t, safe_data, bits_offset, show);
+               goto out;
        }
 
        switch (nr_bits) {
        case 128:
-               btf_int128_print(m, data);
+               btf_int128_print(show, safe_data);
                break;
        case 64:
                if (sign)
-                       seq_printf(m, "%lld", *(s64 *)data);
+                       btf_show_type_value(show, "%lld", *(s64 *)safe_data);
                else
-                       seq_printf(m, "%llu", *(u64 *)data);
+                       btf_show_type_value(show, "%llu", *(u64 *)safe_data);
                break;
        case 32:
                if (sign)
-                       seq_printf(m, "%d", *(s32 *)data);
+                       btf_show_type_value(show, "%d", *(s32 *)safe_data);
                else
-                       seq_printf(m, "%u", *(u32 *)data);
+                       btf_show_type_value(show, "%u", *(u32 *)safe_data);
                break;
        case 16:
                if (sign)
-                       seq_printf(m, "%d", *(s16 *)data);
+                       btf_show_type_value(show, "%d", *(s16 *)safe_data);
                else
-                       seq_printf(m, "%u", *(u16 *)data);
+                       btf_show_type_value(show, "%u", *(u16 *)safe_data);
                break;
        case 8:
+               if (show->state.array_encoding == BTF_INT_CHAR) {
+                       /* check for null terminator */
+                       if (show->state.array_terminated)
+                               break;
+                       if (*(char *)data == '\0') {
+                               show->state.array_terminated = 1;
+                               break;
+                       }
+                       if (isprint(*(char *)data)) {
+                               btf_show_type_value(show, "'%c'",
+                                                   *(char *)safe_data);
+                               break;
+                       }
+               }
                if (sign)
-                       seq_printf(m, "%d", *(s8 *)data);
+                       btf_show_type_value(show, "%d", *(s8 *)safe_data);
                else
-                       seq_printf(m, "%u", *(u8 *)data);
+                       btf_show_type_value(show, "%u", *(u8 *)safe_data);
                break;
        default:
-               btf_int_bits_seq_show(btf, t, data, bits_offset, m);
+               btf_int_bits_show(btf, t, safe_data, bits_offset, show);
+               break;
        }
+out:
+       btf_show_end_type(show);
 }
 
 static const struct btf_kind_operations int_ops = {
@@ -1608,7 +2198,7 @@ static const struct btf_kind_operations int_ops = {
        .check_member = btf_int_check_member,
        .check_kflag_member = btf_int_check_kflag_member,
        .log_details = btf_int_log,
-       .seq_show = btf_int_seq_show,
+       .show = btf_int_show,
 };
 
 static int btf_modifier_check_member(struct btf_verifier_env *env,
@@ -1872,34 +2462,44 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
        return 0;
 }
 
-static void btf_modifier_seq_show(const struct btf *btf,
-                                 const struct btf_type *t,
-                                 u32 type_id, void *data,
-                                 u8 bits_offset, struct seq_file *m)
+static void btf_modifier_show(const struct btf *btf,
+                             const struct btf_type *t,
+                             u32 type_id, void *data,
+                             u8 bits_offset, struct btf_show *show)
 {
        if (btf->resolved_ids)
                t = btf_type_id_resolve(btf, &type_id);
        else
                t = btf_type_skip_modifiers(btf, type_id, NULL);
 
-       btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
+       btf_type_ops(t)->show(btf, t, type_id, data, bits_offset, show);
 }
 
-static void btf_var_seq_show(const struct btf *btf, const struct btf_type *t,
-                            u32 type_id, void *data, u8 bits_offset,
-                            struct seq_file *m)
+static void btf_var_show(const struct btf *btf, const struct btf_type *t,
+                        u32 type_id, void *data, u8 bits_offset,
+                        struct btf_show *show)
 {
        t = btf_type_id_resolve(btf, &type_id);
 
-       btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
+       btf_type_ops(t)->show(btf, t, type_id, data, bits_offset, show);
 }
 
-static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t,
-                            u32 type_id, void *data, u8 bits_offset,
-                            struct seq_file *m)
+static void btf_ptr_show(const struct btf *btf, const struct btf_type *t,
+                        u32 type_id, void *data, u8 bits_offset,
+                        struct btf_show *show)
 {
-       /* It is a hashed value */
-       seq_printf(m, "%p", *(void **)data);
+       void *safe_data;
+
+       safe_data = btf_show_start_type(show, t, type_id, data);
+       if (!safe_data)
+               return;
+
+       /* It is a hashed value unless BTF_SHOW_PTR_RAW is specified */
+       if (show->flags & BTF_SHOW_PTR_RAW)
+               btf_show_type_value(show, "0x%px", *(void **)safe_data);
+       else
+               btf_show_type_value(show, "0x%p", *(void **)safe_data);
+       btf_show_end_type(show);
 }
 
 static void btf_ref_type_log(struct btf_verifier_env *env,
@@ -1914,7 +2514,7 @@ static struct btf_kind_operations modifier_ops = {
        .check_member = btf_modifier_check_member,
        .check_kflag_member = btf_modifier_check_kflag_member,
        .log_details = btf_ref_type_log,
-       .seq_show = btf_modifier_seq_show,
+       .show = btf_modifier_show,
 };
 
 static struct btf_kind_operations ptr_ops = {
@@ -1923,7 +2523,7 @@ static struct btf_kind_operations ptr_ops = {
        .check_member = btf_ptr_check_member,
        .check_kflag_member = btf_generic_check_kflag_member,
        .log_details = btf_ref_type_log,
-       .seq_show = btf_ptr_seq_show,
+       .show = btf_ptr_show,
 };
 
 static s32 btf_fwd_check_meta(struct btf_verifier_env *env,
@@ -1964,7 +2564,7 @@ static struct btf_kind_operations fwd_ops = {
        .check_member = btf_df_check_member,
        .check_kflag_member = btf_df_check_kflag_member,
        .log_details = btf_fwd_type_log,
-       .seq_show = btf_df_seq_show,
+       .show = btf_df_show,
 };
 
 static int btf_array_check_member(struct btf_verifier_env *env,
@@ -2123,28 +2723,90 @@ static void btf_array_log(struct btf_verifier_env *env,
                         array->type, array->index_type, array->nelems);
 }
 
-static void btf_array_seq_show(const struct btf *btf, const struct btf_type *t,
-                              u32 type_id, void *data, u8 bits_offset,
-                              struct seq_file *m)
+static void __btf_array_show(const struct btf *btf, const struct btf_type *t,
+                            u32 type_id, void *data, u8 bits_offset,
+                            struct btf_show *show)
 {
        const struct btf_array *array = btf_type_array(t);
        const struct btf_kind_operations *elem_ops;
        const struct btf_type *elem_type;
-       u32 i, elem_size, elem_type_id;
+       u32 i, elem_size = 0, elem_type_id;
+       u16 encoding = 0;
 
        elem_type_id = array->type;
-       elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
+       elem_type = btf_type_skip_modifiers(btf, elem_type_id, NULL);
+       if (elem_type && btf_type_has_size(elem_type))
+               elem_size = elem_type->size;
+
+       if (elem_type && btf_type_is_int(elem_type)) {
+               u32 int_type = btf_type_int(elem_type);
+
+               encoding = BTF_INT_ENCODING(int_type);
+
+               /*
+                * BTF_INT_CHAR encoding never seems to be set for
+                * char arrays, so if size is 1 and element is
+                * printable as a char, we'll do that.
+                */
+               if (elem_size == 1)
+                       encoding = BTF_INT_CHAR;
+       }
+
+       if (!btf_show_start_array_type(show, t, type_id, encoding, data))
+               return;
+
+       if (!elem_type)
+               goto out;
        elem_ops = btf_type_ops(elem_type);
-       seq_puts(m, "[");
+
        for (i = 0; i < array->nelems; i++) {
-               if (i)
-                       seq_puts(m, ",");
 
-               elem_ops->seq_show(btf, elem_type, elem_type_id, data,
-                                  bits_offset, m);
+               btf_show_start_array_member(show);
+
+               elem_ops->show(btf, elem_type, elem_type_id, data,
+                              bits_offset, show);
                data += elem_size;
+
+               btf_show_end_array_member(show);
+
+               if (show->state.array_terminated)
+                       break;
+       }
+out:
+       btf_show_end_array_type(show);
+}
+
+static void btf_array_show(const struct btf *btf, const struct btf_type *t,
+                          u32 type_id, void *data, u8 bits_offset,
+                          struct btf_show *show)
+{
+       const struct btf_member *m = show->state.member;
+
+       /*
+        * First check if any members would be shown (are non-zero).
+        * See comments above "struct btf_show" definition for more
+        * details on how this works at a high-level.
+        */
+       if (show->state.depth > 0 && !(show->flags & BTF_SHOW_ZERO)) {
+               if (!show->state.depth_check) {
+                       show->state.depth_check = show->state.depth + 1;
+                       show->state.depth_to_show = 0;
+               }
+               __btf_array_show(btf, t, type_id, data, bits_offset, show);
+               show->state.member = m;
+
+               if (show->state.depth_check != show->state.depth + 1)
+                       return;
+               show->state.depth_check = 0;
+
+               if (show->state.depth_to_show <= show->state.depth)
+                       return;
+               /*
+                * Reaching here indicates we have recursed and found
+                * non-zero array member(s).
+                */
        }
-       seq_puts(m, "]");
+       __btf_array_show(btf, t, type_id, data, bits_offset, show);
 }
 
 static struct btf_kind_operations array_ops = {
@@ -2153,7 +2815,7 @@ static struct btf_kind_operations array_ops = {
        .check_member = btf_array_check_member,
        .check_kflag_member = btf_generic_check_kflag_member,
        .log_details = btf_array_log,
-       .seq_show = btf_array_seq_show,
+       .show = btf_array_show,
 };
 
 static int btf_struct_check_member(struct btf_verifier_env *env,
@@ -2376,15 +3038,18 @@ int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
        return off;
 }
 
-static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
-                               u32 type_id, void *data, u8 bits_offset,
-                               struct seq_file *m)
+static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
+                             u32 type_id, void *data, u8 bits_offset,
+                             struct btf_show *show)
 {
-       const char *seq = BTF_INFO_KIND(t->info) == BTF_KIND_UNION ? "|" : ",";
        const struct btf_member *member;
+       void *safe_data;
        u32 i;
 
-       seq_puts(m, "{");
+       safe_data = btf_show_start_struct_type(show, t, type_id, data);
+       if (!safe_data)
+               return;
+
        for_each_member(i, t, member) {
                const struct btf_type *member_type = btf_type_by_id(btf,
                                                                member->type);
@@ -2393,23 +3058,65 @@ static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
                u32 bytes_offset;
                u8 bits8_offset;
 
-               if (i)
-                       seq_puts(m, seq);
+               btf_show_start_member(show, member);
 
                member_offset = btf_member_bit_offset(t, member);
                bitfield_size = btf_member_bitfield_size(t, member);
                bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
                bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
                if (bitfield_size) {
-                       btf_bitfield_seq_show(data + bytes_offset, bits8_offset,
-                                             bitfield_size, m);
+                       safe_data = btf_show_start_type(show, member_type,
+                                                       member->type,
+                                                       data + bytes_offset);
+                       if (safe_data)
+                               btf_bitfield_show(safe_data,
+                                                 bits8_offset,
+                                                 bitfield_size, show);
+                       btf_show_end_type(show);
                } else {
                        ops = btf_type_ops(member_type);
-                       ops->seq_show(btf, member_type, member->type,
-                                     data + bytes_offset, bits8_offset, m);
+                       ops->show(btf, member_type, member->type,
+                                 data + bytes_offset, bits8_offset, show);
                }
+
+               btf_show_end_member(show);
        }
-       seq_puts(m, "}");
+
+       btf_show_end_struct_type(show);
+}
+
+static void btf_struct_show(const struct btf *btf, const struct btf_type *t,
+                           u32 type_id, void *data, u8 bits_offset,
+                           struct btf_show *show)
+{
+       const struct btf_member *m = show->state.member;
+
+       /*
+        * First check if any members would be shown (are non-zero).
+        * See comments above "struct btf_show" definition for more
+        * details on how this works at a high-level.
+        */
+       if (show->state.depth > 0 && !(show->flags & BTF_SHOW_ZERO)) {
+               if (!show->state.depth_check) {
+                       show->state.depth_check = show->state.depth + 1;
+                       show->state.depth_to_show = 0;
+               }
+               __btf_struct_show(btf, t, type_id, data, bits_offset, show);
+               /* Restore saved member data here */
+               show->state.member = m;
+               if (show->state.depth_check != show->state.depth + 1)
+                       return;
+               show->state.depth_check = 0;
+
+               if (show->state.depth_to_show <= show->state.depth)
+                       return;
+               /*
+                * Reaching here indicates we have recursed and found
+                * non-zero child values.
+                */
+       }
+
+       __btf_struct_show(btf, t, type_id, data, bits_offset, show);
 }
 
 static struct btf_kind_operations struct_ops = {
@@ -2418,7 +3125,7 @@ static struct btf_kind_operations struct_ops = {
        .check_member = btf_struct_check_member,
        .check_kflag_member = btf_generic_check_kflag_member,
        .log_details = btf_struct_log,
-       .seq_show = btf_struct_seq_show,
+       .show = btf_struct_show,
 };
 
 static int btf_enum_check_member(struct btf_verifier_env *env,
@@ -2549,24 +3256,35 @@ static void btf_enum_log(struct btf_verifier_env *env,
        btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
 }
 
-static void btf_enum_seq_show(const struct btf *btf, const struct btf_type *t,
-                             u32 type_id, void *data, u8 bits_offset,
-                             struct seq_file *m)
+static void btf_enum_show(const struct btf *btf, const struct btf_type *t,
+                         u32 type_id, void *data, u8 bits_offset,
+                         struct btf_show *show)
 {
        const struct btf_enum *enums = btf_type_enum(t);
        u32 i, nr_enums = btf_type_vlen(t);
-       int v = *(int *)data;
+       void *safe_data;
+       int v;
+
+       safe_data = btf_show_start_type(show, t, type_id, data);
+       if (!safe_data)
+               return;
+
+       v = *(int *)safe_data;
 
        for (i = 0; i < nr_enums; i++) {
-               if (v == enums[i].val) {
-                       seq_printf(m, "%s",
-                                  __btf_name_by_offset(btf,
-                                                       enums[i].name_off));
-                       return;
-               }
+               if (v != enums[i].val)
+                       continue;
+
+               btf_show_type_value(show, "%s",
+                                   __btf_name_by_offset(btf,
+                                                        enums[i].name_off));
+
+               btf_show_end_type(show);
+               return;
        }
 
-       seq_printf(m, "%d", v);
+       btf_show_type_value(show, "%d", v);
+       btf_show_end_type(show);
 }
 
 static struct btf_kind_operations enum_ops = {
@@ -2575,7 +3293,7 @@ static struct btf_kind_operations enum_ops = {
        .check_member = btf_enum_check_member,
        .check_kflag_member = btf_enum_check_kflag_member,
        .log_details = btf_enum_log,
-       .seq_show = btf_enum_seq_show,
+       .show = btf_enum_show,
 };
 
 static s32 btf_func_proto_check_meta(struct btf_verifier_env *env,
@@ -2662,7 +3380,7 @@ static struct btf_kind_operations func_proto_ops = {
        .check_member = btf_df_check_member,
        .check_kflag_member = btf_df_check_kflag_member,
        .log_details = btf_func_proto_log,
-       .seq_show = btf_df_seq_show,
+       .show = btf_df_show,
 };
 
 static s32 btf_func_check_meta(struct btf_verifier_env *env,
@@ -2696,7 +3414,7 @@ static struct btf_kind_operations func_ops = {
        .check_member = btf_df_check_member,
        .check_kflag_member = btf_df_check_kflag_member,
        .log_details = btf_ref_type_log,
-       .seq_show = btf_df_seq_show,
+       .show = btf_df_show,
 };
 
 static s32 btf_var_check_meta(struct btf_verifier_env *env,
@@ -2760,7 +3478,7 @@ static const struct btf_kind_operations var_ops = {
        .check_member           = btf_df_check_member,
        .check_kflag_member     = btf_df_check_kflag_member,
        .log_details            = btf_var_log,
-       .seq_show               = btf_var_seq_show,
+       .show                   = btf_var_show,
 };
 
 static s32 btf_datasec_check_meta(struct btf_verifier_env *env,
@@ -2886,24 +3604,28 @@ static void btf_datasec_log(struct btf_verifier_env *env,
        btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
 }
 
-static void btf_datasec_seq_show(const struct btf *btf,
-                                const struct btf_type *t, u32 type_id,
-                                void *data, u8 bits_offset,
-                                struct seq_file *m)
+static void btf_datasec_show(const struct btf *btf,
+                            const struct btf_type *t, u32 type_id,
+                            void *data, u8 bits_offset,
+                            struct btf_show *show)
 {
        const struct btf_var_secinfo *vsi;
        const struct btf_type *var;
        u32 i;
 
-       seq_printf(m, "section (\"%s\") = {", __btf_name_by_offset(btf, t->name_off));
+       if (!btf_show_start_type(show, t, type_id, data))
+               return;
+
+       btf_show_type_value(show, "section (\"%s\") = {",
+                           __btf_name_by_offset(btf, t->name_off));
        for_each_vsi(i, t, vsi) {
                var = btf_type_by_id(btf, vsi->type);
                if (i)
-                       seq_puts(m, ",");
-               btf_type_ops(var)->seq_show(btf, var, vsi->type,
-                                           data + vsi->offset, bits_offset, m);
+                       btf_show(show, ",");
+               btf_type_ops(var)->show(btf, var, vsi->type,
+                                       data + vsi->offset, bits_offset, show);
        }
-       seq_puts(m, "}");
+       btf_show_end_type(show);
 }
 
 static const struct btf_kind_operations datasec_ops = {
@@ -2912,7 +3634,7 @@ static const struct btf_kind_operations datasec_ops = {
        .check_member           = btf_df_check_member,
        .check_kflag_member     = btf_df_check_kflag_member,
        .log_details            = btf_datasec_log,
-       .seq_show               = btf_datasec_seq_show,
+       .show                   = btf_datasec_show,
 };
 
 static int btf_func_proto_check(struct btf_verifier_env *env,
@@ -3706,7 +4428,7 @@ errout:
 
 struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
 {
-       struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+       struct bpf_prog *tgt_prog = prog->aux->dst_prog;
 
        if (tgt_prog) {
                return tgt_prog->aux->btf;
@@ -3733,7 +4455,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
                    struct bpf_insn_access_aux *info)
 {
        const struct btf_type *t = prog->aux->attach_func_proto;
-       struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+       struct bpf_prog *tgt_prog = prog->aux->dst_prog;
        struct btf *btf = bpf_prog_get_target_btf(prog);
        const char *tname = prog->aux->attach_func_name;
        struct bpf_verifier_log *log = info->log;
@@ -3860,7 +4582,14 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 
        info->reg_type = PTR_TO_BTF_ID;
        if (tgt_prog) {
-               ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg);
+               enum bpf_prog_type tgt_type;
+
+               if (tgt_prog->type == BPF_PROG_TYPE_EXT)
+                       tgt_type = tgt_prog->aux->saved_dst_prog_type;
+               else
+                       tgt_type = tgt_prog->type;
+
+               ret = btf_translate_to_vmlinux(log, btf, t, tgt_type, arg);
                if (ret > 0) {
                        info->btf_id = ret;
                        return true;
@@ -4388,7 +5117,7 @@ static int btf_check_func_type_match(struct bpf_verifier_log *log,
 }
 
 /* Compare BTFs of given program with BTF of target program */
-int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog,
+int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
                         struct btf *btf2, const struct btf_type *t2)
 {
        struct btf *btf1 = prog->aux->btf;
@@ -4396,7 +5125,7 @@ int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog,
        u32 btf_id = 0;
 
        if (!prog->aux->func_info) {
-               bpf_log(&env->log, "Program extension requires BTF\n");
+               bpf_log(log, "Program extension requires BTF\n");
                return -EINVAL;
        }
 
@@ -4408,7 +5137,7 @@ int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog,
        if (!t1 || !btf_type_is_func(t1))
                return -EFAULT;
 
-       return btf_check_func_type_match(&env->log, btf1, t1, btf2, t2);
+       return btf_check_func_type_match(log, btf1, t1, btf2, t2);
 }
 
 /* Compare BTF of a function with given bpf_reg_state.
@@ -4559,7 +5288,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
                return -EFAULT;
        }
        if (prog_type == BPF_PROG_TYPE_EXT)
-               prog_type = prog->aux->linked_prog->type;
+               prog_type = prog->aux->dst_prog->type;
 
        t = btf_type_by_id(btf, t->type);
        if (!t || !btf_type_is_func_proto(t)) {
@@ -4606,12 +5335,93 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
        return 0;
 }
 
+static void btf_type_show(const struct btf *btf, u32 type_id, void *obj,
+                         struct btf_show *show)
+{
+       const struct btf_type *t = btf_type_by_id(btf, type_id);
+
+       show->btf = btf;
+       memset(&show->state, 0, sizeof(show->state));
+       memset(&show->obj, 0, sizeof(show->obj));
+
+       btf_type_ops(t)->show(btf, t, type_id, obj, 0, show);
+}
+
+static void btf_seq_show(struct btf_show *show, const char *fmt,
+                        va_list args)
+{
+       seq_vprintf((struct seq_file *)show->target, fmt, args);
+}
+
+int btf_type_seq_show_flags(const struct btf *btf, u32 type_id,
+                           void *obj, struct seq_file *m, u64 flags)
+{
+       struct btf_show sseq;
+
+       sseq.target = m;
+       sseq.showfn = btf_seq_show;
+       sseq.flags = flags;
+
+       btf_type_show(btf, type_id, obj, &sseq);
+
+       return sseq.state.status;
+}
+
 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
                       struct seq_file *m)
 {
-       const struct btf_type *t = btf_type_by_id(btf, type_id);
+       (void) btf_type_seq_show_flags(btf, type_id, obj, m,
+                                      BTF_SHOW_NONAME | BTF_SHOW_COMPACT |
+                                      BTF_SHOW_ZERO | BTF_SHOW_UNSAFE);
+}
+
+struct btf_show_snprintf {
+       struct btf_show show;
+       int len_left;           /* space left in string */
+       int len;                /* length we would have written */
+};
+
+static void btf_snprintf_show(struct btf_show *show, const char *fmt,
+                             va_list args)
+{
+       struct btf_show_snprintf *ssnprintf = (struct btf_show_snprintf *)show;
+       int len;
+
+       len = vsnprintf(show->target, ssnprintf->len_left, fmt, args);
+
+       if (len < 0) {
+               ssnprintf->len_left = 0;
+               ssnprintf->len = len;
+       } else if (len > ssnprintf->len_left) {
+               /* no space, drive on to get length we would have written */
+               ssnprintf->len_left = 0;
+               ssnprintf->len += len;
+       } else {
+               ssnprintf->len_left -= len;
+               ssnprintf->len += len;
+               show->target += len;
+       }
+}
+
+int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj,
+                          char *buf, int len, u64 flags)
+{
+       struct btf_show_snprintf ssnprintf;
+
+       ssnprintf.show.target = buf;
+       ssnprintf.show.flags = flags;
+       ssnprintf.show.showfn = btf_snprintf_show;
+       ssnprintf.len_left = len;
+       ssnprintf.len = 0;
+
+       btf_type_show(btf, type_id, obj, (struct btf_show *)&ssnprintf);
+
+       /* If we encontered an error, return it. */
+       if (ssnprintf.show.state.status)
+               return ssnprintf.show.state.status;
 
-       btf_type_ops(t)->seq_show(btf, t, type_id, obj, 0, m);
+       /* Otherwise return length we would have written */
+       return ssnprintf.len;
 }
 
 #ifdef CONFIG_PROC_FS
index c4811b1..cda674f 100644 (file)
@@ -99,6 +99,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
 
        INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
        mutex_init(&fp->aux->used_maps_mutex);
+       mutex_init(&fp->aux->dst_mutex);
 
        return fp;
 }
@@ -255,6 +256,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
 {
        if (fp->aux) {
                mutex_destroy(&fp->aux->used_maps_mutex);
+               mutex_destroy(&fp->aux->dst_mutex);
                free_percpu(fp->aux->stats);
                kfree(fp->aux->poke_tab);
                kfree(fp->aux);
@@ -2138,7 +2140,8 @@ static void bpf_prog_free_deferred(struct work_struct *work)
        if (aux->prog->has_callchain_buf)
                put_callchain_buffers();
 #endif
-       bpf_trampoline_put(aux->trampoline);
+       if (aux->dst_trampoline)
+               bpf_trampoline_put(aux->dst_trampoline);
        for (i = 0; i < aux->func_cnt; i++)
                bpf_jit_free(aux->func[i]);
        if (aux->func_cnt) {
@@ -2154,8 +2157,8 @@ void bpf_prog_free(struct bpf_prog *fp)
 {
        struct bpf_prog_aux *aux = fp->aux;
 
-       if (aux->linked_prog)
-               bpf_prog_put(aux->linked_prog);
+       if (aux->dst_prog)
+               bpf_prog_put(aux->dst_prog);
        INIT_WORK(&aux->work, bpf_prog_free_deferred);
        schedule_work(&aux->work);
 }
@@ -2216,6 +2219,8 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
+const struct bpf_func_proto bpf_snprintf_btf_proto __weak;
+const struct bpf_func_proto bpf_seq_printf_btf_proto __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
index 7e1a8ad..c61a23b 100644 (file)
@@ -155,8 +155,7 @@ static void cpu_map_kthread_stop(struct work_struct *work)
        kthread_stop(rcpu->kthread);
 }
 
-static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
-                                        struct xdp_frame *xdpf,
+static struct sk_buff *cpu_map_build_skb(struct xdp_frame *xdpf,
                                         struct sk_buff *skb)
 {
        unsigned int hard_start_headroom;
@@ -365,7 +364,7 @@ static int cpu_map_kthread_run(void *data)
                        struct sk_buff *skb = skbs[i];
                        int ret;
 
-                       skb = cpu_map_build_skb(rcpu, xdpf, skb);
+                       skb = cpu_map_build_skb(xdpf, skb);
                        if (!skb) {
                                xdp_return_frame(xdpf);
                                continue;
index 5cc7425..e825441 100644 (file)
@@ -683,6 +683,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
                if (!perfmon_capable())
                        return NULL;
                return bpf_get_trace_printk_proto();
+       case BPF_FUNC_snprintf_btf:
+               if (!perfmon_capable())
+                       return NULL;
+               return &bpf_snprintf_btf_proto;
        case BPF_FUNC_jiffies64:
                return &bpf_jiffies64_proto;
        default:
diff --git a/kernel/bpf/preload/.gitignore b/kernel/bpf/preload/.gitignore
new file mode 100644 (file)
index 0000000..856a4c5
--- /dev/null
@@ -0,0 +1,4 @@
+/FEATURE-DUMP.libbpf
+/bpf_helper_defs.h
+/feature
+/bpf_preload_umd
index 12c7b62..23ee310 100644 (file)
@@ -12,6 +12,8 @@ userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \
 
 userprogs := bpf_preload_umd
 
+clean-files := $(userprogs) bpf_helper_defs.h FEATURE-DUMP.libbpf staticobjs/ feature/
+
 bpf_preload_umd-objs := iterators/iterators.o
 bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz
 
index 5ded550..52aa7b3 100644 (file)
@@ -42,7 +42,7 @@ struct bpf_prog_aux {
        __u32 id;
        char name[16];
        const char *attach_func_name;
-       struct bpf_prog *linked_prog;
+       struct bpf_prog *dst_prog;
        struct bpf_func_info *func_info;
        struct btf *btf;
 };
@@ -108,7 +108,7 @@ int dump_bpf_prog(struct bpf_iter__bpf_prog *ctx)
 
        BPF_SEQ_PRINTF(seq, "%4u %-16s %s %s\n", aux->id,
                       get_name(aux->btf, aux->func_info[0].type_id, aux->name),
-                      aux->attach_func_name, aux->linked_prog->aux->name);
+                      aux->attach_func_name, aux->dst_prog->aux->name);
        return 0;
 }
 char LICENSE[] SEC("license") = "GPL";
index c317135..cf9a6a9 100644 (file)
@@ -47,7 +47,7 @@ iterators_bpf__open_opts(const struct bpf_object_open_opts *opts)
 {
        struct iterators_bpf *obj;
 
-       obj = (typeof(obj))calloc(1, sizeof(*obj));
+       obj = (struct iterators_bpf *)calloc(1, sizeof(*obj));
        if (!obj)
                return NULL;
        if (iterators_bpf__create_skeleton(obj))
@@ -105,7 +105,7 @@ iterators_bpf__create_skeleton(struct iterators_bpf *obj)
 {
        struct bpf_object_skeleton *s;
 
-       s = (typeof(s))calloc(1, sizeof(*s));
+       s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));
        if (!s)
                return -1;
        obj->skeleton = s;
@@ -117,7 +117,7 @@ iterators_bpf__create_skeleton(struct iterators_bpf *obj)
        /* maps */
        s->map_cnt = 1;
        s->map_skel_sz = sizeof(*s->maps);
-       s->maps = (typeof(s->maps))calloc(s->map_cnt, s->map_skel_sz);
+       s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);
        if (!s->maps)
                goto err;
 
@@ -128,7 +128,7 @@ iterators_bpf__create_skeleton(struct iterators_bpf *obj)
        /* programs */
        s->prog_cnt = 2;
        s->prog_skel_sz = sizeof(*s->progs);
-       s->progs = (typeof(s->progs))calloc(s->prog_cnt, s->prog_skel_sz);
+       s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);
        if (!s->progs)
                goto err;
 
@@ -140,10 +140,10 @@ iterators_bpf__create_skeleton(struct iterators_bpf *obj)
        s->progs[1].prog = &obj->progs.dump_bpf_prog;
        s->progs[1].link = &obj->links.dump_bpf_prog;
 
-       s->data_sz = 7128;
+       s->data_sz = 7176;
        s->data = (void *)"\
 \x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\x18\x18\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0f\0\
+\0\0\0\0\0\0\0\0\0\0\0\x48\x18\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0f\0\
 \x0e\0\x79\x12\0\0\0\0\0\0\x79\x26\0\0\0\0\0\0\x79\x17\x08\0\0\0\0\0\x15\x07\
 \x1a\0\0\0\0\0\x79\x21\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\
 \x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\0\
@@ -164,7 +164,7 @@ iterators_bpf__create_skeleton(struct iterators_bpf *obj)
 \x79\x86\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf8\xff\xff\xff\xb7\x02\0\
 \0\x08\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x01\0\0\0\0\0\0\x79\xa3\xf8\xff\0\0\0\0\
 \x0f\x13\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf4\xff\xff\xff\xb7\x02\0\
-\0\x04\0\0\0\x85\0\0\0\x04\0\0\0\xb7\x03\0\0\x04\0\0\0\x61\xa1\xf4\xff\0\0\0\0\
+\0\x04\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x03\0\0\x04\0\0\0\x61\xa1\xf4\xff\0\0\0\0\
 \x61\x82\x10\0\0\0\0\0\x3d\x21\x02\0\0\0\0\0\x0f\x16\0\0\0\0\0\0\xbf\x69\0\0\0\
 \0\0\0\x7b\x9a\xd8\xff\0\0\0\0\x79\x71\x18\0\0\0\0\0\x7b\x1a\xe0\xff\0\0\0\0\
 \x79\x71\x20\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\x31\0\0\0\0\0\0\x7b\x1a\xe8\xff\
@@ -176,230 +176,232 @@ iterators_bpf__create_skeleton(struct iterators_bpf *obj)
 \x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\
 \x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\x64\x0a\0\x25\x34\
 \x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0\x47\x50\x4c\0\x9f\
-\xeb\x01\0\x18\0\0\0\0\0\0\0\x1c\x04\0\0\x1c\x04\0\0\0\x05\0\0\0\0\0\0\0\0\0\
+\xeb\x01\0\x18\0\0\0\0\0\0\0\x1c\x04\0\0\x1c\x04\0\0\x09\x05\0\0\0\0\0\0\0\0\0\
 \x02\x02\0\0\0\x01\0\0\0\x02\0\0\x04\x10\0\0\0\x13\0\0\0\x03\0\0\0\0\0\0\0\x18\
 \0\0\0\x04\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x08\0\0\0\0\0\0\0\0\0\0\x02\x0d\0\
 \0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x01\0\0\0\x20\0\0\0\0\0\0\x01\x04\
-\0\0\0\x20\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xa3\0\0\0\x03\0\0\x04\x18\0\
-\0\0\xb1\0\0\0\x09\0\0\0\0\0\0\0\xb5\0\0\0\x0b\0\0\0\x40\0\0\0\xc0\0\0\0\x0b\0\
-\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xc8\0\0\0\0\0\0\x07\0\0\0\0\xd1\0\0\
-\0\0\0\0\x08\x0c\0\0\0\xd7\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\x98\x01\0\0\x03\
-\0\0\x04\x18\0\0\0\xa0\x01\0\0\x0e\0\0\0\0\0\0\0\xa3\x01\0\0\x11\0\0\0\x20\0\0\
-\0\xa8\x01\0\0\x0e\0\0\0\xa0\0\0\0\xb4\x01\0\0\0\0\0\x08\x0f\0\0\0\xba\x01\0\0\
-\0\0\0\x01\x04\0\0\0\x20\0\0\0\xc7\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\
-\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xcc\x01\0\0\0\0\0\x01\x04\
-\0\0\0\x20\0\0\0\0\0\0\0\0\0\0\x02\x14\0\0\0\x30\x02\0\0\x02\0\0\x04\x10\0\0\0\
-\x13\0\0\0\x03\0\0\0\0\0\0\0\x43\x02\0\0\x15\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\
-\x18\0\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x13\0\0\0\x48\x02\0\0\x01\0\
-\0\x0c\x16\0\0\0\x94\x02\0\0\x01\0\0\x04\x08\0\0\0\x9d\x02\0\0\x19\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\x02\x1a\0\0\0\xee\x02\0\0\x06\0\0\x04\x38\0\0\0\xa0\x01\0\0\
-\x0e\0\0\0\0\0\0\0\xa3\x01\0\0\x11\0\0\0\x20\0\0\0\xfb\x02\0\0\x1b\0\0\0\xc0\0\
-\0\0\x0c\x03\0\0\x15\0\0\0\0\x01\0\0\x18\x03\0\0\x1d\0\0\0\x40\x01\0\0\x22\x03\
+\0\0\0\x20\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xaf\0\0\0\x03\0\0\x04\x18\0\
+\0\0\xbd\0\0\0\x09\0\0\0\0\0\0\0\xc1\0\0\0\x0b\0\0\0\x40\0\0\0\xcc\0\0\0\x0b\0\
+\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xd4\0\0\0\0\0\0\x07\0\0\0\0\xdd\0\0\
+\0\0\0\0\x08\x0c\0\0\0\xe3\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\xa4\x01\0\0\x03\
+\0\0\x04\x18\0\0\0\xac\x01\0\0\x0e\0\0\0\0\0\0\0\xaf\x01\0\0\x11\0\0\0\x20\0\0\
+\0\xb4\x01\0\0\x0e\0\0\0\xa0\0\0\0\xc0\x01\0\0\0\0\0\x08\x0f\0\0\0\xc6\x01\0\0\
+\0\0\0\x01\x04\0\0\0\x20\0\0\0\xd3\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\
+\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xd8\x01\0\0\0\0\0\x01\x04\
+\0\0\0\x20\0\0\0\0\0\0\0\0\0\0\x02\x14\0\0\0\x3c\x02\0\0\x02\0\0\x04\x10\0\0\0\
+\x13\0\0\0\x03\0\0\0\0\0\0\0\x4f\x02\0\0\x15\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\
+\x18\0\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x13\0\0\0\x54\x02\0\0\x01\0\
+\0\x0c\x16\0\0\0\xa0\x02\0\0\x01\0\0\x04\x08\0\0\0\xa9\x02\0\0\x19\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\x02\x1a\0\0\0\xfa\x02\0\0\x06\0\0\x04\x38\0\0\0\xac\x01\0\0\
+\x0e\0\0\0\0\0\0\0\xaf\x01\0\0\x11\0\0\0\x20\0\0\0\x07\x03\0\0\x1b\0\0\0\xc0\0\
+\0\0\x18\x03\0\0\x15\0\0\0\0\x01\0\0\x21\x03\0\0\x1d\0\0\0\x40\x01\0\0\x2b\x03\
 \0\0\x1e\0\0\0\x80\x01\0\0\0\0\0\0\0\0\0\x02\x1c\0\0\0\0\0\0\0\0\0\0\x0a\x10\0\
-\0\0\0\0\0\0\0\0\0\x02\x1f\0\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\x6c\x03\0\0\x02\0\
-\0\x04\x08\0\0\0\x7a\x03\0\0\x0e\0\0\0\0\0\0\0\x83\x03\0\0\x0e\0\0\0\x20\0\0\0\
-\x22\x03\0\0\x03\0\0\x04\x18\0\0\0\x8d\x03\0\0\x1b\0\0\0\0\0\0\0\x95\x03\0\0\
-\x21\0\0\0\x40\0\0\0\x9b\x03\0\0\x23\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x22\0\0\
-\0\0\0\0\0\0\0\0\x02\x24\0\0\0\x9f\x03\0\0\x01\0\0\x04\x04\0\0\0\xaa\x03\0\0\
-\x0e\0\0\0\0\0\0\0\x13\x04\0\0\x01\0\0\x04\x04\0\0\0\x1c\x04\0\0\x0e\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\0\x92\x04\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\x02\x1f\0\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\x75\x03\0\0\x02\0\
+\0\x04\x08\0\0\0\x83\x03\0\0\x0e\0\0\0\0\0\0\0\x8c\x03\0\0\x0e\0\0\0\x20\0\0\0\
+\x2b\x03\0\0\x03\0\0\x04\x18\0\0\0\x96\x03\0\0\x1b\0\0\0\0\0\0\0\x9e\x03\0\0\
+\x21\0\0\0\x40\0\0\0\xa4\x03\0\0\x23\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x22\0\0\
+\0\0\0\0\0\0\0\0\x02\x24\0\0\0\xa8\x03\0\0\x01\0\0\x04\x04\0\0\0\xb3\x03\0\0\
+\x0e\0\0\0\0\0\0\0\x1c\x04\0\0\x01\0\0\x04\x04\0\0\0\x25\x04\0\0\x0e\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\0\x9b\x04\0\0\0\0\0\
 \x0e\x25\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x0e\0\0\0\
-\xa6\x04\0\0\0\0\0\x0e\x27\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\
-\x12\0\0\0\x20\0\0\0\xbc\x04\0\0\0\0\0\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\
-\0\0\0\0\x1c\0\0\0\x12\0\0\0\x11\0\0\0\xd1\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\xe8\x04\0\0\0\0\0\x0e\
-\x2d\0\0\0\x01\0\0\0\xf0\x04\0\0\x04\0\0\x0f\0\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\
+\xaf\x04\0\0\0\0\0\x0e\x27\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\
+\x12\0\0\0\x20\0\0\0\xc5\x04\0\0\0\0\0\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\
+\0\0\0\0\x1c\0\0\0\x12\0\0\0\x11\0\0\0\xda\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\xf1\x04\0\0\0\0\0\x0e\
+\x2d\0\0\0\x01\0\0\0\xf9\x04\0\0\x04\0\0\x0f\0\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\
 \0\x28\0\0\0\x23\0\0\0\x0e\0\0\0\x2a\0\0\0\x31\0\0\0\x20\0\0\0\x2c\0\0\0\x51\0\
-\0\0\x11\0\0\0\xf8\x04\0\0\x01\0\0\x0f\0\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\0\0\0\
+\0\0\x11\0\0\0\x01\x05\0\0\x01\0\0\x0f\0\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\0\0\0\
 \x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x6d\x65\
 \x74\x61\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\x74\0\x64\x75\x6d\x70\x5f\x62\
 \x70\x66\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\
-\x30\x3a\x30\0\x2f\x77\x2f\x6e\x65\x74\x2d\x6e\x65\x78\x74\x2f\x6b\x65\x72\x6e\
-\x65\x6c\x2f\x62\x70\x66\x2f\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\
-\x61\x74\x6f\x72\x73\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\
-\x2e\x63\0\x09\x73\x74\x72\x75\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\
-\x2a\x73\x65\x71\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\
-\x65\x71\x3b\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\
-\x71\0\x73\x65\x73\x73\x69\x6f\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\
-\x73\x65\x71\x5f\x66\x69\x6c\x65\0\x5f\x5f\x75\x36\x34\0\x6c\x6f\x6e\x67\x20\
-\x6c\x6f\x6e\x67\x20\x75\x6e\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x30\x3a\
-\x31\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\
-\x61\x70\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\
-\x21\x6d\x61\x70\x29\0\x30\x3a\x32\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\
-\x5f\x6e\x75\x6d\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\
-\x65\x71\x5f\x6e\x75\x6d\x3b\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\
-\x20\x3d\x3d\x20\x30\x29\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\
-\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\
-\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\
-\x74\x72\x69\x65\x73\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\
-\x64\0\x6e\x61\x6d\x65\0\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\
-\x75\x33\x32\0\x75\x6e\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\
-\x72\0\x5f\x5f\x41\x52\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\
-\x5f\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\
-\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x5c\x6e\x22\
-\x2c\x20\x6d\x61\x70\x2d\x3e\x69\x64\x2c\x20\x6d\x61\x70\x2d\x3e\x6e\x61\x6d\
-\x65\x2c\x20\x6d\x61\x70\x2d\x3e\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\
-\x29\x3b\0\x7d\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x70\
-\x72\x6f\x67\0\x70\x72\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\
-\x6f\x67\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x09\x73\x74\
-\x72\x75\x63\x74\x20\x62\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\x72\x6f\x67\
-\x20\x3d\x20\x63\x74\x78\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\x20\x28\x21\
-\x70\x72\x6f\x67\x29\0\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\x78\0\x09\x61\
-\x75\x78\x20\x3d\x20\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\x09\x09\x42\x50\
-\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\
-\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
-\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\
-\x5f\x70\x72\x6f\x67\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\x68\x5f\x66\x75\x6e\
-\x63\x5f\x6e\x61\x6d\x65\0\x6c\x69\x6e\x6b\x65\x64\x5f\x70\x72\x6f\x67\0\x66\
-\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\x45\
-\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\
-\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\x78\
-\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\x62\
-\x74\x66\x29\0\x62\x70\x66\x5f\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\x6e\
-\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\x69\
-\x6e\x67\x73\0\x74\x79\x70\x65\x73\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\x61\
-\x64\x65\x72\0\x73\x74\x72\x5f\x6c\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\x3d\
-\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\x72\
-\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\x2c\
-\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\x29\x2c\x20\x74\x79\x70\x65\x73\x20\x2b\
-\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\x09\x73\x74\x72\x20\x3d\x20\x62\x74\x66\
-\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\0\
-\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3d\
-\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\x6e\
-\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\x30\x3a\x32\x3a\x30\0\x09\x69\x66\x20\
-\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3e\x3d\x20\x62\x74\x66\x2d\x3e\x68\
-\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\x29\0\x09\x72\x65\x74\x75\x72\x6e\x20\
-\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\0\
-\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\
-\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\
-\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\
-\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\
-\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\x4e\x53\x45\0\x2e\x72\x6f\x64\x61\
-\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\x9f\xeb\x01\0\x20\0\0\0\0\0\0\0\x24\0\
-\0\0\x24\0\0\0\x44\x02\0\0\x68\x02\0\0\xa4\x01\0\0\x08\0\0\0\x31\0\0\0\x01\0\0\
-\0\0\0\0\0\x07\0\0\0\x56\x02\0\0\x01\0\0\0\0\0\0\0\x17\0\0\0\x10\0\0\0\x31\0\0\
-\0\x09\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\x1e\x40\x01\0\x08\0\0\0\x42\0\0\0\x7b\
-\0\0\0\x24\x40\x01\0\x10\0\0\0\x42\0\0\0\xf2\0\0\0\x1d\x48\x01\0\x18\0\0\0\x42\
-\0\0\0\x13\x01\0\0\x06\x50\x01\0\x20\0\0\0\x42\0\0\0\x22\x01\0\0\x1d\x44\x01\0\
-\x28\0\0\0\x42\0\0\0\x47\x01\0\0\x06\x5c\x01\0\x38\0\0\0\x42\0\0\0\x5a\x01\0\0\
-\x03\x60\x01\0\x70\0\0\0\x42\0\0\0\xe0\x01\0\0\x02\x68\x01\0\xf0\0\0\0\x42\0\0\
-\0\x2e\x02\0\0\x01\x70\x01\0\x56\x02\0\0\x1a\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\
-\x1e\x84\x01\0\x08\0\0\0\x42\0\0\0\x7b\0\0\0\x24\x84\x01\0\x10\0\0\0\x42\0\0\0\
-\x64\x02\0\0\x1f\x8c\x01\0\x18\0\0\0\x42\0\0\0\x88\x02\0\0\x06\x98\x01\0\x20\0\
-\0\0\x42\0\0\0\xa1\x02\0\0\x0e\xa4\x01\0\x28\0\0\0\x42\0\0\0\x22\x01\0\0\x1d\
-\x88\x01\0\x30\0\0\0\x42\0\0\0\x47\x01\0\0\x06\xa8\x01\0\x40\0\0\0\x42\0\0\0\
-\xb3\x02\0\0\x03\xac\x01\0\x80\0\0\0\x42\0\0\0\x26\x03\0\0\x02\xb4\x01\0\xb8\0\
-\0\0\x42\0\0\0\x61\x03\0\0\x06\x08\x01\0\xd0\0\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\
-\xd8\0\0\0\x42\0\0\0\xb2\x03\0\0\x0f\x14\x01\0\xe0\0\0\0\x42\0\0\0\xc7\x03\0\0\
-\x2d\x18\x01\0\xf0\0\0\0\x42\0\0\0\xfe\x03\0\0\x0d\x10\x01\0\0\x01\0\0\x42\0\0\
-\0\0\0\0\0\0\0\0\0\x08\x01\0\0\x42\0\0\0\xc7\x03\0\0\x02\x18\x01\0\x20\x01\0\0\
-\x42\0\0\0\x25\x04\0\0\x0d\x1c\x01\0\x38\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x40\
-\x01\0\0\x42\0\0\0\x25\x04\0\0\x0d\x1c\x01\0\x58\x01\0\0\x42\0\0\0\x25\x04\0\0\
-\x0d\x1c\x01\0\x60\x01\0\0\x42\0\0\0\x53\x04\0\0\x1b\x20\x01\0\x68\x01\0\0\x42\
-\0\0\0\x53\x04\0\0\x06\x20\x01\0\x70\x01\0\0\x42\0\0\0\x76\x04\0\0\x0d\x28\x01\
-\0\x78\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x80\x01\0\0\x42\0\0\0\x26\x03\0\0\x02\
-\xb4\x01\0\xf8\x01\0\0\x42\0\0\0\x2e\x02\0\0\x01\xc4\x01\0\x10\0\0\0\x31\0\0\0\
-\x07\0\0\0\0\0\0\0\x02\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\
-\0\0\0\x10\0\0\0\x02\0\0\0\xee\0\0\0\0\0\0\0\x20\0\0\0\x08\0\0\0\x1e\x01\0\0\0\
-\0\0\0\x70\0\0\0\x0d\0\0\0\x3e\0\0\0\0\0\0\0\x80\0\0\0\x0d\0\0\0\xee\0\0\0\0\0\
-\0\0\xa0\0\0\0\x0d\0\0\0\x1e\x01\0\0\0\0\0\0\x56\x02\0\0\x12\0\0\0\0\0\0\0\x14\
-\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x14\0\
-\0\0\xee\0\0\0\0\0\0\0\x20\0\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x28\0\0\0\x08\0\0\
-\0\x1e\x01\0\0\0\0\0\0\x80\0\0\0\x1a\0\0\0\x3e\0\0\0\0\0\0\0\x90\0\0\0\x1a\0\0\
-\0\xee\0\0\0\0\0\0\0\xa8\0\0\0\x1a\0\0\0\x59\x03\0\0\0\0\0\0\xb0\0\0\0\x1a\0\0\
-\0\x5d\x03\0\0\0\0\0\0\xc0\0\0\0\x1f\0\0\0\x8b\x03\0\0\0\0\0\0\xd8\0\0\0\x20\0\
-\0\0\xee\0\0\0\0\0\0\0\xf0\0\0\0\x20\0\0\0\x3e\0\0\0\0\0\0\0\x18\x01\0\0\x24\0\
-\0\0\x3e\0\0\0\0\0\0\0\x50\x01\0\0\x1a\0\0\0\xee\0\0\0\0\0\0\0\x60\x01\0\0\x20\
-\0\0\0\x4d\x04\0\0\0\0\0\0\x88\x01\0\0\x1a\0\0\0\x1e\x01\0\0\0\0\0\0\x98\x01\0\
-\0\x1a\0\0\0\x8e\x04\0\0\0\0\0\0\xa0\x01\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd6\0\0\0\0\0\x02\0\x70\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\xc8\0\0\0\0\0\x02\0\xf0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\xcf\0\0\0\0\0\x03\0\x78\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xc1\0\0\0\0\0\x03\0\x80\
-\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xba\0\0\0\0\0\x03\0\xf8\x01\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\x14\0\0\0\x01\0\x04\0\0\0\0\0\0\0\0\0\x23\0\0\0\0\0\0\0\xf4\0\0\0\
-\x01\0\x04\0\x23\0\0\0\0\0\0\0\x0e\0\0\0\0\0\0\0\x28\0\0\0\x01\0\x04\0\x31\0\0\
-\0\0\0\0\0\x20\0\0\0\0\0\0\0\xdd\0\0\0\x01\0\x04\0\x51\0\0\0\0\0\0\0\x11\0\0\0\
-\0\0\0\0\0\0\0\0\x03\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\x03\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\xb2\0\0\0\x11\0\x05\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\x3d\0\0\0\x12\
-\0\x02\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\x5b\0\0\0\x12\0\x03\0\0\0\0\0\0\0\0\
-\0\x08\x02\0\0\0\0\0\0\x48\0\0\0\0\0\0\0\x01\0\0\0\x0c\0\0\0\xc8\0\0\0\0\0\0\0\
-\x01\0\0\0\x0c\0\0\0\x50\0\0\0\0\0\0\0\x01\0\0\0\x0c\0\0\0\xd0\x01\0\0\0\0\0\0\
-\x01\0\0\0\x0c\0\0\0\xf0\x03\0\0\0\0\0\0\x0a\0\0\0\x0c\0\0\0\xfc\x03\0\0\0\0\0\
-\0\x0a\0\0\0\x0c\0\0\0\x08\x04\0\0\0\0\0\0\x0a\0\0\0\x0c\0\0\0\x14\x04\0\0\0\0\
-\0\0\x0a\0\0\0\x0c\0\0\0\x2c\x04\0\0\0\0\0\0\0\0\0\0\x0d\0\0\0\x2c\0\0\0\0\0\0\
-\0\0\0\0\0\x0a\0\0\0\x3c\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x50\0\0\0\0\0\0\0\0\0\
-\0\0\x0a\0\0\0\x60\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\x70\0\0\0\0\0\0\0\0\0\0\0\
-\x0a\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\x90\0\0\0\0\0\0\0\0\0\0\0\x0a\0\
-\0\0\xa0\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\xb0\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\
-\xc0\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\xd0\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\xe8\0\
-\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xf8\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x08\x01\0\0\
-\0\0\0\0\0\0\0\0\x0b\0\0\0\x18\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x28\x01\0\0\0\
-\0\0\0\0\0\0\0\x0b\0\0\0\x38\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x48\x01\0\0\0\0\
-\0\0\0\0\0\0\x0b\0\0\0\x58\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x68\x01\0\0\0\0\0\
-\0\0\0\0\0\x0b\0\0\0\x78\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x88\x01\0\0\0\0\0\0\
-\0\0\0\0\x0b\0\0\0\x98\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xa8\x01\0\0\0\0\0\0\0\
-\0\0\0\x0b\0\0\0\xb8\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xc8\x01\0\0\0\0\0\0\0\0\
-\0\0\x0b\0\0\0\xd8\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xe8\x01\0\0\0\0\0\0\0\0\0\
-\0\x0b\0\0\0\xf8\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x08\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x28\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\x38\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x48\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\x58\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x68\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\x78\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x94\x02\0\0\0\0\0\0\0\0\0\0\
-\x0a\0\0\0\xa4\x02\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\xb4\x02\0\0\0\0\0\0\0\0\0\0\
-\x0a\0\0\0\xc4\x02\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\xd4\x02\0\0\0\0\0\0\0\0\0\0\
-\x0a\0\0\0\xe4\x02\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\xf4\x02\0\0\0\0\0\0\0\0\0\0\
-\x0a\0\0\0\x0c\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x1c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\x2c\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x3c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\x4c\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x5c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\x6c\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x7c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\x8c\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x9c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\xac\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xbc\x03\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\xcc\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xdc\x03\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\xec\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xfc\x03\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\x0c\x04\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x1c\x04\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\x4e\x4f\x41\x42\x43\x44\x4d\0\x2e\x74\x65\x78\x74\0\x2e\x72\x65\x6c\
-\x2e\x42\x54\x46\x2e\x65\x78\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\
-\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\
-\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
-\x61\x70\0\x2e\x72\x65\x6c\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\
-\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x2e\x72\x65\x6c\x69\x74\
-\x65\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x2e\x6c\x6c\x76\x6d\x5f\x61\x64\
-\x64\x72\x73\x69\x67\0\x6c\x69\x63\x65\x6e\x73\x65\0\x2e\x73\x74\x72\x74\x61\
-\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x2e\x72\x6f\x64\x61\x74\x61\0\x2e\x72\x65\
-\x6c\x2e\x42\x54\x46\0\x4c\x49\x43\x45\x4e\x53\x45\0\x4c\x42\x42\x31\x5f\x37\0\
-\x4c\x42\x42\x31\x5f\x36\0\x4c\x42\x42\x30\x5f\x34\0\x4c\x42\x42\x31\x5f\x33\0\
-\x4c\x42\x42\x30\x5f\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\
-\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
-\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\x30\x3a\x30\0\x2f\x68\x6f\x6d\x65\x2f\x61\x6c\x72\x75\x61\x2f\x62\x75\x69\x6c\
+\x64\x2f\x6c\x69\x6e\x75\x78\x2f\x6b\x65\x72\x6e\x65\x6c\x2f\x62\x70\x66\x2f\
+\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2f\x69\
+\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\x63\0\x09\x73\x74\x72\x75\
+\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\x2a\x73\x65\x71\x20\x3d\x20\
+\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x3b\0\x62\x70\x66\x5f\
+\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\x71\0\x73\x65\x73\x73\x69\x6f\
+\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\x73\x65\x71\x5f\x66\x69\x6c\
+\x65\0\x5f\x5f\x75\x36\x34\0\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\x20\x75\x6e\
+\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x30\x3a\x31\0\x09\x73\x74\x72\x75\
+\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\x61\x70\x20\x3d\x20\x63\
+\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\x21\x6d\x61\x70\x29\0\
+\x30\x3a\x32\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\
+\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x5f\x6e\x75\x6d\
+\x3b\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x3d\x20\x30\x29\
+\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\
+\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\
+\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x5c\
+\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\x64\0\x6e\x61\x6d\x65\0\
+\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\x75\x33\x32\0\x75\x6e\
+\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\x72\0\x5f\x5f\x41\x52\
+\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\0\x09\x42\x50\x46\
+\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\
+\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x5c\x6e\x22\x2c\x20\x6d\x61\x70\
+\x2d\x3e\x69\x64\x2c\x20\x6d\x61\x70\x2d\x3e\x6e\x61\x6d\x65\x2c\x20\x6d\x61\
+\x70\x2d\x3e\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x29\x3b\0\x7d\0\x62\
+\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x70\x72\
+\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x69\x74\x65\
+\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\
+\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\x72\x6f\x67\x20\x3d\x20\x63\x74\x78\
+\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\x20\x28\x21\x70\x72\x6f\x67\x29\0\
+\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\x78\0\x09\x61\x75\x78\x20\x3d\x20\
+\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\
+\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\
+\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\
+\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x70\x72\x6f\x67\
+\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\x68\x5f\x66\x75\x6e\x63\x5f\x6e\x61\x6d\
+\x65\0\x64\x73\x74\x5f\x70\x72\x6f\x67\0\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\
+\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\
+\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\
+\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\x78\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\
+\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\x62\x74\x66\x29\0\x62\x70\x66\x5f\x66\
+\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\x6e\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\
+\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\x69\x6e\x67\x73\0\x74\x79\x70\x65\x73\
+\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\x61\x64\x65\x72\0\x73\x74\x72\x5f\x6c\
+\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\x3d\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\
+\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\x72\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\
+\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\x2c\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\
+\x29\x2c\x20\x74\x79\x70\x65\x73\x20\x2b\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\
+\x09\x73\x74\x72\x20\x3d\x20\x62\x74\x66\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\
+\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\0\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\
+\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3d\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\
+\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\
+\x30\x3a\x32\x3a\x30\0\x09\x69\x66\x20\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\
+\x3e\x3d\x20\x62\x74\x66\x2d\x3e\x68\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\
+\x29\0\x09\x72\x65\x74\x75\x72\x6e\x20\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\
+\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\
+\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\
+\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\
+\x4e\x53\x45\0\x2e\x72\x6f\x64\x61\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\x9f\
+\xeb\x01\0\x20\0\0\0\0\0\0\0\x24\0\0\0\x24\0\0\0\x44\x02\0\0\x68\x02\0\0\xa4\
+\x01\0\0\x08\0\0\0\x31\0\0\0\x01\0\0\0\0\0\0\0\x07\0\0\0\x62\x02\0\0\x01\0\0\0\
+\0\0\0\0\x17\0\0\0\x10\0\0\0\x31\0\0\0\x09\0\0\0\0\0\0\0\x42\0\0\0\x87\0\0\0\
+\x1e\x40\x01\0\x08\0\0\0\x42\0\0\0\x87\0\0\0\x24\x40\x01\0\x10\0\0\0\x42\0\0\0\
+\xfe\0\0\0\x1d\x48\x01\0\x18\0\0\0\x42\0\0\0\x1f\x01\0\0\x06\x50\x01\0\x20\0\0\
+\0\x42\0\0\0\x2e\x01\0\0\x1d\x44\x01\0\x28\0\0\0\x42\0\0\0\x53\x01\0\0\x06\x5c\
+\x01\0\x38\0\0\0\x42\0\0\0\x66\x01\0\0\x03\x60\x01\0\x70\0\0\0\x42\0\0\0\xec\
+\x01\0\0\x02\x68\x01\0\xf0\0\0\0\x42\0\0\0\x3a\x02\0\0\x01\x70\x01\0\x62\x02\0\
+\0\x1a\0\0\0\0\0\0\0\x42\0\0\0\x87\0\0\0\x1e\x84\x01\0\x08\0\0\0\x42\0\0\0\x87\
+\0\0\0\x24\x84\x01\0\x10\0\0\0\x42\0\0\0\x70\x02\0\0\x1f\x8c\x01\0\x18\0\0\0\
+\x42\0\0\0\x94\x02\0\0\x06\x98\x01\0\x20\0\0\0\x42\0\0\0\xad\x02\0\0\x0e\xa4\
+\x01\0\x28\0\0\0\x42\0\0\0\x2e\x01\0\0\x1d\x88\x01\0\x30\0\0\0\x42\0\0\0\x53\
+\x01\0\0\x06\xa8\x01\0\x40\0\0\0\x42\0\0\0\xbf\x02\0\0\x03\xac\x01\0\x80\0\0\0\
+\x42\0\0\0\x2f\x03\0\0\x02\xb4\x01\0\xb8\0\0\0\x42\0\0\0\x6a\x03\0\0\x06\x08\
+\x01\0\xd0\0\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\xd8\0\0\0\x42\0\0\0\xbb\x03\0\0\x0f\
+\x14\x01\0\xe0\0\0\0\x42\0\0\0\xd0\x03\0\0\x2d\x18\x01\0\xf0\0\0\0\x42\0\0\0\
+\x07\x04\0\0\x0d\x10\x01\0\0\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x08\x01\0\0\x42\
+\0\0\0\xd0\x03\0\0\x02\x18\x01\0\x20\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\x1c\x01\
+\0\x38\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x40\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\
+\x1c\x01\0\x58\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\x1c\x01\0\x60\x01\0\0\x42\0\0\
+\0\x5c\x04\0\0\x1b\x20\x01\0\x68\x01\0\0\x42\0\0\0\x5c\x04\0\0\x06\x20\x01\0\
+\x70\x01\0\0\x42\0\0\0\x7f\x04\0\0\x0d\x28\x01\0\x78\x01\0\0\x42\0\0\0\0\0\0\0\
+\0\0\0\0\x80\x01\0\0\x42\0\0\0\x2f\x03\0\0\x02\xb4\x01\0\xf8\x01\0\0\x42\0\0\0\
+\x3a\x02\0\0\x01\xc4\x01\0\x10\0\0\0\x31\0\0\0\x07\0\0\0\0\0\0\0\x02\0\0\0\x3e\
+\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x02\0\0\0\xfa\0\
+\0\0\0\0\0\0\x20\0\0\0\x08\0\0\0\x2a\x01\0\0\0\0\0\0\x70\0\0\0\x0d\0\0\0\x3e\0\
+\0\0\0\0\0\0\x80\0\0\0\x0d\0\0\0\xfa\0\0\0\0\0\0\0\xa0\0\0\0\x0d\0\0\0\x2a\x01\
+\0\0\0\0\0\0\x62\x02\0\0\x12\0\0\0\0\0\0\0\x14\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\
+\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x14\0\0\0\xfa\0\0\0\0\0\0\0\x20\0\0\0\
+\x18\0\0\0\x3e\0\0\0\0\0\0\0\x28\0\0\0\x08\0\0\0\x2a\x01\0\0\0\0\0\0\x80\0\0\0\
+\x1a\0\0\0\x3e\0\0\0\0\0\0\0\x90\0\0\0\x1a\0\0\0\xfa\0\0\0\0\0\0\0\xa8\0\0\0\
+\x1a\0\0\0\x62\x03\0\0\0\0\0\0\xb0\0\0\0\x1a\0\0\0\x66\x03\0\0\0\0\0\0\xc0\0\0\
+\0\x1f\0\0\0\x94\x03\0\0\0\0\0\0\xd8\0\0\0\x20\0\0\0\xfa\0\0\0\0\0\0\0\xf0\0\0\
+\0\x20\0\0\0\x3e\0\0\0\0\0\0\0\x18\x01\0\0\x24\0\0\0\x3e\0\0\0\0\0\0\0\x50\x01\
+\0\0\x1a\0\0\0\xfa\0\0\0\0\0\0\0\x60\x01\0\0\x20\0\0\0\x56\x04\0\0\0\0\0\0\x88\
+\x01\0\0\x1a\0\0\0\x2a\x01\0\0\0\0\0\0\x98\x01\0\0\x1a\0\0\0\x97\x04\0\0\0\0\0\
+\0\xa0\x01\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\x91\0\0\0\x04\0\xf1\xff\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xe6\0\0\
+\0\0\0\x02\0\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd8\0\0\0\0\0\x02\0\xf0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\xdf\0\0\0\0\0\x03\0\x78\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\xd1\0\0\0\0\0\x03\0\x80\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xca\0\0\0\0\0\x03\0\
+\xf8\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x14\0\0\0\x01\0\x04\0\0\0\0\0\0\0\0\0\x23\
+\0\0\0\0\0\0\0\x04\x01\0\0\x01\0\x04\0\x23\0\0\0\0\0\0\0\x0e\0\0\0\0\0\0\0\x28\
+\0\0\0\x01\0\x04\0\x31\0\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\xed\0\0\0\x01\0\x04\0\
+\x51\0\0\0\0\0\0\0\x11\0\0\0\0\0\0\0\0\0\0\0\x03\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\x03\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\
+\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xc2\0\0\0\x11\0\x05\0\0\0\0\0\0\0\0\0\
+\x04\0\0\0\0\0\0\0\x3d\0\0\0\x12\0\x02\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\x5b\
+\0\0\0\x12\0\x03\0\0\0\0\0\0\0\0\0\x08\x02\0\0\0\0\0\0\x48\0\0\0\0\0\0\0\x01\0\
+\0\0\x0d\0\0\0\xc8\0\0\0\0\0\0\0\x01\0\0\0\x0d\0\0\0\x50\0\0\0\0\0\0\0\x01\0\0\
+\0\x0d\0\0\0\xd0\x01\0\0\0\0\0\0\x01\0\0\0\x0d\0\0\0\xf0\x03\0\0\0\0\0\0\x0a\0\
+\0\0\x0d\0\0\0\xfc\x03\0\0\0\0\0\0\x0a\0\0\0\x0d\0\0\0\x08\x04\0\0\0\0\0\0\x0a\
+\0\0\0\x0d\0\0\0\x14\x04\0\0\0\0\0\0\x0a\0\0\0\x0d\0\0\0\x2c\x04\0\0\0\0\0\0\0\
+\0\0\0\x0e\0\0\0\x2c\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x3c\0\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x50\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x60\0\0\0\0\0\0\0\0\0\0\0\x0b\0\
+\0\0\x70\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\
+\x90\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xa0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xb0\0\
+\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xc0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xd0\0\0\0\0\
+\0\0\0\0\0\0\0\x0b\0\0\0\xe8\0\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xf8\0\0\0\0\0\0\0\
+\0\0\0\0\x0c\0\0\0\x08\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x18\x01\0\0\0\0\0\0\0\
+\0\0\0\x0c\0\0\0\x28\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x38\x01\0\0\0\0\0\0\0\0\
+\0\0\x0c\0\0\0\x48\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x58\x01\0\0\0\0\0\0\0\0\0\
+\0\x0c\0\0\0\x68\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x78\x01\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x88\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x98\x01\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\xa8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xb8\x01\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\xc8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xd8\x01\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\xe8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xf8\x01\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x08\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x28\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x38\x02\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x48\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x58\x02\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x68\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x78\x02\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x94\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xa4\x02\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\xb4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xc4\x02\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\xd4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xe4\x02\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\xf4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x0c\x03\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x1c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x2c\x03\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x3c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x4c\x03\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x5c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x6c\x03\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x7c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x8c\x03\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x9c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xac\x03\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\xbc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xcc\x03\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\xdc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xec\x03\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\xfc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x0c\x04\0\0\0\0\0\0\0\0\0\0\
+\x0c\0\0\0\x1c\x04\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x4d\x4e\x40\x41\x42\x43\x4c\0\
+\x2e\x74\x65\x78\x74\0\x2e\x72\x65\x6c\x2e\x42\x54\x46\x2e\x65\x78\x74\0\x64\
+\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\
+\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\
+\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x2e\x72\x65\x6c\x69\x74\x65\
+\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\
+\x72\x6f\x67\0\x2e\x72\x65\x6c\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\
+\x67\0\x2e\x6c\x6c\x76\x6d\x5f\x61\x64\x64\x72\x73\x69\x67\0\x6c\x69\x63\x65\
+\x6e\x73\x65\0\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\x63\0\
+\x2e\x73\x74\x72\x74\x61\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x2e\x72\x6f\x64\
+\x61\x74\x61\0\x2e\x72\x65\x6c\x2e\x42\x54\x46\0\x4c\x49\x43\x45\x4e\x53\x45\0\
+\x4c\x42\x42\x31\x5f\x37\0\x4c\x42\x42\x31\x5f\x36\0\x4c\x42\x42\x30\x5f\x34\0\
+\x4c\x42\x42\x31\x5f\x33\0\x4c\x42\x42\x30\x5f\x33\0\x64\x75\x6d\x70\x5f\x62\
+\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x64\x75\x6d\
+\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\0\0\
 \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\x4e\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\
-\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\x6d\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\x01\0\0\0\0\0\0\x08\
-\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa1\0\0\0\
-\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x48\x03\0\0\0\0\0\0\x62\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x89\0\0\0\x01\0\0\0\x03\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xaa\x03\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xad\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\xae\x03\0\0\0\0\0\0\x34\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\xe2\x0c\0\0\0\0\0\0\x2c\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\x99\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x10\x11\0\0\0\
-\0\0\0\x80\x01\0\0\0\0\0\0\x0e\0\0\0\x0d\0\0\0\x08\0\0\0\0\0\0\0\x18\0\0\0\0\0\
-\0\0\x4a\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x90\x12\0\0\0\0\0\0\
-\x20\0\0\0\0\0\0\0\x08\0\0\0\x02\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x69\
-\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xb0\x12\0\0\0\0\0\0\x20\0\0\0\
-\0\0\0\0\x08\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\xa9\0\0\0\x09\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd0\x12\0\0\0\0\0\0\x50\0\0\0\0\0\0\0\
-\x08\0\0\0\x06\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x07\0\0\0\x09\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x20\x13\0\0\0\0\0\0\xe0\x03\0\0\0\0\0\0\x08\0\0\
-\0\x07\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x7b\0\0\0\x03\x4c\xff\x6f\0\0\
-\0\x80\0\0\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\0\0\0\0\x07\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x91\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\x07\x17\0\0\0\0\0\0\x0a\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0";
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\
+\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x4e\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\x6d\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\x40\x01\0\0\0\0\0\0\x08\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\xb1\0\0\0\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x48\x03\0\
+\0\0\0\0\0\x62\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\x89\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xaa\x03\0\0\0\0\0\0\x04\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xbd\0\0\0\x01\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xae\x03\0\0\0\0\0\0\x3d\x09\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x01\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\xeb\x0c\0\0\0\0\0\0\x2c\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa9\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\x18\x11\0\0\0\0\0\0\x98\x01\0\0\0\0\0\0\x0e\0\0\0\x0e\0\0\0\x08\0\0\
+\0\0\0\0\0\x18\0\0\0\0\0\0\0\x4a\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\xb0\x12\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\x08\0\0\0\x02\0\0\0\x08\0\0\0\0\0\0\0\
+\x10\0\0\0\0\0\0\0\x69\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd0\x12\
+\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\x08\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\
+\0\0\0\0\xb9\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xf0\x12\0\0\0\0\0\
+\0\x50\0\0\0\0\0\0\0\x08\0\0\0\x06\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\
+\x07\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\x13\0\0\0\0\0\0\xe0\
+\x03\0\0\0\0\0\0\x08\0\0\0\x07\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x7b\0\
+\0\0\x03\x4c\xff\x6f\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\0\0\x20\x17\0\0\0\0\0\0\x07\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa1\0\0\0\x03\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x27\x17\0\0\0\0\0\0\x1a\x01\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
 
        return 0;
 err:
index 5a2ba11..a55cd54 100644 (file)
@@ -191,7 +191,7 @@ int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
        rcu_read_lock();
        sk = reuseport_array_lookup_elem(map, key);
        if (sk) {
-               *(u64 *)value = sock_gen_cookie(sk);
+               *(u64 *)value = __sock_gen_cookie(sk);
                err = 0;
        } else {
                err = -ENOENT;
index 3426849..f1528c2 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
 #include <linux/bpf_lirc.h>
+#include <linux/bpf_verifier.h>
 #include <linux/btf.h>
 #include <linux/syscalls.h>
 #include <linux/slab.h>
@@ -2154,14 +2155,14 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
        prog->expected_attach_type = attr->expected_attach_type;
        prog->aux->attach_btf_id = attr->attach_btf_id;
        if (attr->attach_prog_fd) {
-               struct bpf_prog *tgt_prog;
+               struct bpf_prog *dst_prog;
 
-               tgt_prog = bpf_prog_get(attr->attach_prog_fd);
-               if (IS_ERR(tgt_prog)) {
-                       err = PTR_ERR(tgt_prog);
+               dst_prog = bpf_prog_get(attr->attach_prog_fd);
+               if (IS_ERR(dst_prog)) {
+                       err = PTR_ERR(dst_prog);
                        goto free_prog_nouncharge;
                }
-               prog->aux->linked_prog = tgt_prog;
+               prog->aux->dst_prog = dst_prog;
        }
 
        prog->aux->offload_requested = !!attr->prog_ifindex;
@@ -2345,8 +2346,12 @@ void bpf_link_put(struct bpf_link *link)
        if (!atomic64_dec_and_test(&link->refcnt))
                return;
 
-       INIT_WORK(&link->work, bpf_link_put_deferred);
-       schedule_work(&link->work);
+       if (in_atomic()) {
+               INIT_WORK(&link->work, bpf_link_put_deferred);
+               schedule_work(&link->work);
+       } else {
+               bpf_link_free(link);
+       }
 }
 
 static int bpf_link_release(struct inode *inode, struct file *filp)
@@ -2494,11 +2499,23 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd)
 struct bpf_tracing_link {
        struct bpf_link link;
        enum bpf_attach_type attach_type;
+       struct bpf_trampoline *trampoline;
+       struct bpf_prog *tgt_prog;
 };
 
 static void bpf_tracing_link_release(struct bpf_link *link)
 {
-       WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
+       struct bpf_tracing_link *tr_link =
+               container_of(link, struct bpf_tracing_link, link);
+
+       WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog,
+                                               tr_link->trampoline));
+
+       bpf_trampoline_put(tr_link->trampoline);
+
+       /* tgt_prog is NULL if target is a kernel function */
+       if (tr_link->tgt_prog)
+               bpf_prog_put(tr_link->tgt_prog);
 }
 
 static void bpf_tracing_link_dealloc(struct bpf_link *link)
@@ -2538,10 +2555,15 @@ static const struct bpf_link_ops bpf_tracing_link_lops = {
        .fill_link_info = bpf_tracing_link_fill_link_info,
 };
 
-static int bpf_tracing_prog_attach(struct bpf_prog *prog)
+static int bpf_tracing_prog_attach(struct bpf_prog *prog,
+                                  int tgt_prog_fd,
+                                  u32 btf_id)
 {
        struct bpf_link_primer link_primer;
+       struct bpf_prog *tgt_prog = NULL;
+       struct bpf_trampoline *tr = NULL;
        struct bpf_tracing_link *link;
+       u64 key = 0;
        int err;
 
        switch (prog->type) {
@@ -2570,6 +2592,28 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
                goto out_put_prog;
        }
 
+       if (!!tgt_prog_fd != !!btf_id) {
+               err = -EINVAL;
+               goto out_put_prog;
+       }
+
+       if (tgt_prog_fd) {
+               /* For now we only allow new targets for BPF_PROG_TYPE_EXT */
+               if (prog->type != BPF_PROG_TYPE_EXT) {
+                       err = -EINVAL;
+                       goto out_put_prog;
+               }
+
+               tgt_prog = bpf_prog_get(tgt_prog_fd);
+               if (IS_ERR(tgt_prog)) {
+                       err = PTR_ERR(tgt_prog);
+                       tgt_prog = NULL;
+                       goto out_put_prog;
+               }
+
+               key = bpf_trampoline_compute_key(tgt_prog, btf_id);
+       }
+
        link = kzalloc(sizeof(*link), GFP_USER);
        if (!link) {
                err = -ENOMEM;
@@ -2579,20 +2623,100 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
                      &bpf_tracing_link_lops, prog);
        link->attach_type = prog->expected_attach_type;
 
-       err = bpf_link_prime(&link->link, &link_primer);
-       if (err) {
-               kfree(link);
-               goto out_put_prog;
+       mutex_lock(&prog->aux->dst_mutex);
+
+       /* There are a few possible cases here:
+        *
+        * - if prog->aux->dst_trampoline is set, the program was just loaded
+        *   and not yet attached to anything, so we can use the values stored
+        *   in prog->aux
+        *
+        * - if prog->aux->dst_trampoline is NULL, the program has already been
+         *   attached to a target and its initial target was cleared (below)
+        *
+        * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
+        *   target_btf_id using the link_create API.
+        *
+        * - if tgt_prog == NULL when this function was called using the old
+         *   raw_tracepoint_open API, and we need a target from prog->aux
+         *
+         * The combination of no saved target in prog->aux, and no target
+         * specified on load is illegal, and we reject that here.
+        */
+       if (!prog->aux->dst_trampoline && !tgt_prog) {
+               err = -ENOENT;
+               goto out_unlock;
        }
 
-       err = bpf_trampoline_link_prog(prog);
+       if (!prog->aux->dst_trampoline ||
+           (key && key != prog->aux->dst_trampoline->key)) {
+               /* If there is no saved target, or the specified target is
+                * different from the destination specified at load time, we
+                * need a new trampoline and a check for compatibility
+                */
+               struct bpf_attach_target_info tgt_info = {};
+
+               err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id,
+                                             &tgt_info);
+               if (err)
+                       goto out_unlock;
+
+               tr = bpf_trampoline_get(key, &tgt_info);
+               if (!tr) {
+                       err = -ENOMEM;
+                       goto out_unlock;
+               }
+       } else {
+               /* The caller didn't specify a target, or the target was the
+                * same as the destination supplied during program load. This
+                * means we can reuse the trampoline and reference from program
+                * load time, and there is no need to allocate a new one. This
+                * can only happen once for any program, as the saved values in
+                * prog->aux are cleared below.
+                */
+               tr = prog->aux->dst_trampoline;
+               tgt_prog = prog->aux->dst_prog;
+       }
+
+       err = bpf_link_prime(&link->link, &link_primer);
+       if (err)
+               goto out_unlock;
+
+       err = bpf_trampoline_link_prog(prog, tr);
        if (err) {
                bpf_link_cleanup(&link_primer);
-               goto out_put_prog;
+               link = NULL;
+               goto out_unlock;
        }
 
+       link->tgt_prog = tgt_prog;
+       link->trampoline = tr;
+
+       /* Always clear the trampoline and target prog from prog->aux to make
+        * sure the original attach destination is not kept alive after a
+        * program is (re-)attached to another target.
+        */
+       if (prog->aux->dst_prog &&
+           (tgt_prog_fd || tr != prog->aux->dst_trampoline))
+               /* got extra prog ref from syscall, or attaching to different prog */
+               bpf_prog_put(prog->aux->dst_prog);
+       if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline)
+               /* we allocated a new trampoline, so free the old one */
+               bpf_trampoline_put(prog->aux->dst_trampoline);
+
+       prog->aux->dst_prog = NULL;
+       prog->aux->dst_trampoline = NULL;
+       mutex_unlock(&prog->aux->dst_mutex);
+
        return bpf_link_settle(&link_primer);
+out_unlock:
+       if (tr && tr != prog->aux->dst_trampoline)
+               bpf_trampoline_put(tr);
+       mutex_unlock(&prog->aux->dst_mutex);
+       kfree(link);
 out_put_prog:
+       if (tgt_prog_fd && tgt_prog)
+               bpf_prog_put(tgt_prog);
        bpf_prog_put(prog);
        return err;
 }
@@ -2706,7 +2830,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
                        tp_name = prog->aux->attach_func_name;
                        break;
                }
-               return bpf_tracing_prog_attach(prog);
+               return bpf_tracing_prog_attach(prog, 0, 0);
        case BPF_PROG_TYPE_RAW_TRACEPOINT:
        case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
                if (strncpy_from_user(buf,
@@ -2975,7 +3099,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
        }
 }
 
-#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
+#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu
 
 static int bpf_prog_test_run(const union bpf_attr *attr,
                             union bpf_attr __user *uattr)
@@ -3890,10 +4014,15 @@ err_put:
 
 static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
-       if (attr->link_create.attach_type == BPF_TRACE_ITER &&
-           prog->expected_attach_type == BPF_TRACE_ITER)
-               return bpf_iter_link_attach(attr, prog);
+       if (attr->link_create.attach_type != prog->expected_attach_type)
+               return -EINVAL;
 
+       if (prog->expected_attach_type == BPF_TRACE_ITER)
+               return bpf_iter_link_attach(attr, prog);
+       else if (prog->type == BPF_PROG_TYPE_EXT)
+               return bpf_tracing_prog_attach(prog,
+                                              attr->link_create.target_fd,
+                                              attr->link_create.target_btf_id);
        return -EINVAL;
 }
 
@@ -3907,18 +4036,25 @@ static int link_create(union bpf_attr *attr)
        if (CHECK_ATTR(BPF_LINK_CREATE))
                return -EINVAL;
 
-       ptype = attach_type_to_prog_type(attr->link_create.attach_type);
-       if (ptype == BPF_PROG_TYPE_UNSPEC)
-               return -EINVAL;
-
-       prog = bpf_prog_get_type(attr->link_create.prog_fd, ptype);
+       prog = bpf_prog_get(attr->link_create.prog_fd);
        if (IS_ERR(prog))
                return PTR_ERR(prog);
 
        ret = bpf_prog_attach_check_attach_type(prog,
                                                attr->link_create.attach_type);
        if (ret)
-               goto err_out;
+               goto out;
+
+       if (prog->type == BPF_PROG_TYPE_EXT) {
+               ret = tracing_bpf_link_attach(attr, prog);
+               goto out;
+       }
+
+       ptype = attach_type_to_prog_type(attr->link_create.attach_type);
+       if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
+               ret = -EINVAL;
+               goto out;
+       }
 
        switch (ptype) {
        case BPF_PROG_TYPE_CGROUP_SKB:
@@ -3946,7 +4082,7 @@ static int link_create(union bpf_attr *attr)
                ret = -EINVAL;
        }
 
-err_out:
+out:
        if (ret < 0)
                bpf_prog_put(prog);
        return ret;
index 7dd523a..35c5887 100644 (file)
@@ -65,7 +65,7 @@ static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr)
        bpf_image_ksym_add(tr->image, ksym);
 }
 
-struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 {
        struct bpf_trampoline *tr;
        struct hlist_head *head;
@@ -261,14 +261,12 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
        }
 }
 
-int bpf_trampoline_link_prog(struct bpf_prog *prog)
+int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
 {
        enum bpf_tramp_prog_type kind;
-       struct bpf_trampoline *tr;
        int err = 0;
        int cnt;
 
-       tr = prog->aux->trampoline;
        kind = bpf_attach_type_to_tramp(prog);
        mutex_lock(&tr->mutex);
        if (tr->extension_prog) {
@@ -301,7 +299,7 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog)
        }
        hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
        tr->progs_cnt[kind]++;
-       err = bpf_trampoline_update(prog->aux->trampoline);
+       err = bpf_trampoline_update(tr);
        if (err) {
                hlist_del(&prog->aux->tramp_hlist);
                tr->progs_cnt[kind]--;
@@ -312,13 +310,11 @@ out:
 }
 
 /* bpf_trampoline_unlink_prog() should never fail. */
-int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
+int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
 {
        enum bpf_tramp_prog_type kind;
-       struct bpf_trampoline *tr;
        int err;
 
-       tr = prog->aux->trampoline;
        kind = bpf_attach_type_to_tramp(prog);
        mutex_lock(&tr->mutex);
        if (kind == BPF_TRAMP_REPLACE) {
@@ -330,12 +326,32 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
        }
        hlist_del(&prog->aux->tramp_hlist);
        tr->progs_cnt[kind]--;
-       err = bpf_trampoline_update(prog->aux->trampoline);
+       err = bpf_trampoline_update(tr);
 out:
        mutex_unlock(&tr->mutex);
        return err;
 }
 
+struct bpf_trampoline *bpf_trampoline_get(u64 key,
+                                         struct bpf_attach_target_info *tgt_info)
+{
+       struct bpf_trampoline *tr;
+
+       tr = bpf_trampoline_lookup(key);
+       if (!tr)
+               return NULL;
+
+       mutex_lock(&tr->mutex);
+       if (tr->func.addr)
+               goto out;
+
+       memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
+       tr->func.addr = (void *)tgt_info->tgt_addr;
+out:
+       mutex_unlock(&tr->mutex);
+       return tr;
+}
+
 void bpf_trampoline_put(struct bpf_trampoline *tr)
 {
        if (!tr)
index 42dee5d..015a1c0 100644 (file)
@@ -486,7 +486,12 @@ static bool is_acquire_function(enum bpf_func_id func_id,
 static bool is_ptr_cast_function(enum bpf_func_id func_id)
 {
        return func_id == BPF_FUNC_tcp_sock ||
-               func_id == BPF_FUNC_sk_fullsock;
+               func_id == BPF_FUNC_sk_fullsock ||
+               func_id == BPF_FUNC_skc_to_tcp_sock ||
+               func_id == BPF_FUNC_skc_to_tcp6_sock ||
+               func_id == BPF_FUNC_skc_to_udp6_sock ||
+               func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
+               func_id == BPF_FUNC_skc_to_tcp_request_sock;
 }
 
 /* string representation of 'enum bpf_reg_type' */
@@ -2643,8 +2648,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 
 static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
 {
-       return prog->aux->linked_prog ? prog->aux->linked_prog->type
-                                     : prog->type;
+       return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
 }
 
 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
@@ -3938,7 +3942,7 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env,
        case BPF_MAP_TYPE_SOCKMAP:
        case BPF_MAP_TYPE_SOCKHASH:
                if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
-                       *arg_type = ARG_PTR_TO_SOCKET;
+                       *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
                } else {
                        verbose(env, "invalid arg_type for sockmap/sockhash\n");
                        return -EINVAL;
@@ -3953,6 +3957,7 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env,
 
 struct bpf_reg_types {
        const enum bpf_reg_type types[10];
+       u32 *btf_id;
 };
 
 static const struct bpf_reg_types map_key_value_types = {
@@ -3973,6 +3978,17 @@ static const struct bpf_reg_types sock_types = {
        },
 };
 
+static const struct bpf_reg_types btf_id_sock_common_types = {
+       .types = {
+               PTR_TO_SOCK_COMMON,
+               PTR_TO_SOCKET,
+               PTR_TO_TCP_SOCK,
+               PTR_TO_XDP_SOCK,
+               PTR_TO_BTF_ID,
+       },
+       .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+};
+
 static const struct bpf_reg_types mem_types = {
        .types = {
                PTR_TO_STACK,
@@ -4014,6 +4030,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
        [ARG_PTR_TO_CTX]                = &context_types,
        [ARG_PTR_TO_CTX_OR_NULL]        = &context_types,
        [ARG_PTR_TO_SOCK_COMMON]        = &sock_types,
+       [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
        [ARG_PTR_TO_SOCKET]             = &fullsock_types,
        [ARG_PTR_TO_SOCKET_OR_NULL]     = &fullsock_types,
        [ARG_PTR_TO_BTF_ID]             = &btf_ptr_types,
@@ -4028,19 +4045,27 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 };
 
 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
-                         const struct bpf_reg_types *compatible)
+                         enum bpf_arg_type arg_type,
+                         const u32 *arg_btf_id)
 {
        struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
        enum bpf_reg_type expected, type = reg->type;
+       const struct bpf_reg_types *compatible;
        int i, j;
 
+       compatible = compatible_reg_types[arg_type];
+       if (!compatible) {
+               verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
+               return -EFAULT;
+       }
+
        for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
                expected = compatible->types[i];
                if (expected == NOT_INIT)
                        break;
 
                if (type == expected)
-                       return 0;
+                       goto found;
        }
 
        verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
@@ -4048,6 +4073,33 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
                verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
        verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
        return -EACCES;
+
+found:
+       if (type == PTR_TO_BTF_ID) {
+               if (!arg_btf_id) {
+                       if (!compatible->btf_id) {
+                               verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
+                               return -EFAULT;
+                       }
+                       arg_btf_id = compatible->btf_id;
+               }
+
+               if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
+                                         *arg_btf_id)) {
+                       verbose(env, "R%d is of type %s but %s is expected\n",
+                               regno, kernel_type_name(reg->btf_id),
+                               kernel_type_name(*arg_btf_id));
+                       return -EACCES;
+               }
+
+               if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+                       verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
+                               regno);
+                       return -EACCES;
+               }
+       }
+
+       return 0;
 }
 
 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
@@ -4057,7 +4109,6 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
        u32 regno = BPF_REG_1 + arg;
        struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
        enum bpf_arg_type arg_type = fn->arg_type[arg];
-       const struct bpf_reg_types *compatible;
        enum bpf_reg_type type = reg->type;
        int err = 0;
 
@@ -4097,35 +4148,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
                 */
                goto skip_type_check;
 
-       compatible = compatible_reg_types[arg_type];
-       if (!compatible) {
-               verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
-               return -EFAULT;
-       }
-
-       err = check_reg_type(env, regno, compatible);
+       err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
        if (err)
                return err;
 
-       if (type == PTR_TO_BTF_ID) {
-               const u32 *btf_id = fn->arg_btf_id[arg];
-
-               if (!btf_id) {
-                       verbose(env, "verifier internal error: missing BTF ID\n");
-                       return -EFAULT;
-               }
-
-               if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id, *btf_id)) {
-                       verbose(env, "R%d is of type %s but %s is expected\n",
-                               regno, kernel_type_name(reg->btf_id), kernel_type_name(*btf_id));
-                       return -EACCES;
-               }
-               if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
-                       verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
-                               regno);
-                       return -EACCES;
-               }
-       } else if (type == PTR_TO_CTX) {
+       if (type == PTR_TO_CTX) {
                err = check_ctx_reg(env, reg, regno);
                if (err < 0)
                        return err;
@@ -4573,10 +4600,14 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
 {
        int i;
 
-       for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++)
+       for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
                if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
                        return false;
 
+               if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
+                       return false;
+       }
+
        return true;
 }
 
@@ -5817,8 +5848,7 @@ static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
        u64 umax_val = src_reg->umax_value;
 
        if (src_known && dst_known) {
-               __mark_reg_known(dst_reg, dst_reg->var_off.value &
-                                         src_reg->var_off.value);
+               __mark_reg_known(dst_reg, dst_reg->var_off.value);
                return;
        }
 
@@ -5888,8 +5918,7 @@ static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
        u64 umin_val = src_reg->umin_value;
 
        if (src_known && dst_known) {
-               __mark_reg_known(dst_reg, dst_reg->var_off.value |
-                                         src_reg->var_off.value);
+               __mark_reg_known(dst_reg, dst_reg->var_off.value);
                return;
        }
 
@@ -11173,11 +11202,10 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
 }
 #define SECURITY_PREFIX "security_"
 
-static int check_attach_modify_return(struct bpf_prog *prog, unsigned long addr)
+static int check_attach_modify_return(unsigned long addr, const char *func_name)
 {
        if (within_error_injection_list(addr) ||
-           !strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
-                    sizeof(SECURITY_PREFIX) - 1))
+           !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
                return 0;
 
        return -EINVAL;
@@ -11214,55 +11242,39 @@ static int check_non_sleepable_error_inject(u32 btf_id)
        return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
 }
 
-static int check_attach_btf_id(struct bpf_verifier_env *env)
+int bpf_check_attach_target(struct bpf_verifier_log *log,
+                           const struct bpf_prog *prog,
+                           const struct bpf_prog *tgt_prog,
+                           u32 btf_id,
+                           struct bpf_attach_target_info *tgt_info)
 {
-       struct bpf_prog *prog = env->prog;
        bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
-       struct bpf_prog *tgt_prog = prog->aux->linked_prog;
-       u32 btf_id = prog->aux->attach_btf_id;
        const char prefix[] = "btf_trace_";
-       struct btf_func_model fmodel;
        int ret = 0, subprog = -1, i;
-       struct bpf_trampoline *tr;
        const struct btf_type *t;
        bool conservative = true;
        const char *tname;
        struct btf *btf;
-       long addr;
-       u64 key;
-
-       if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
-           prog->type != BPF_PROG_TYPE_LSM) {
-               verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
-               return -EINVAL;
-       }
-
-       if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
-               return check_struct_ops_btf_id(env);
-
-       if (prog->type != BPF_PROG_TYPE_TRACING &&
-           prog->type != BPF_PROG_TYPE_LSM &&
-           !prog_extension)
-               return 0;
+       long addr = 0;
 
        if (!btf_id) {
-               verbose(env, "Tracing programs must provide btf_id\n");
+               bpf_log(log, "Tracing programs must provide btf_id\n");
                return -EINVAL;
        }
-       btf = bpf_prog_get_target_btf(prog);
+       btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux;
        if (!btf) {
-               verbose(env,
+               bpf_log(log,
                        "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
                return -EINVAL;
        }
        t = btf_type_by_id(btf, btf_id);
        if (!t) {
-               verbose(env, "attach_btf_id %u is invalid\n", btf_id);
+               bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
                return -EINVAL;
        }
        tname = btf_name_by_offset(btf, t->name_off);
        if (!tname) {
-               verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
+               bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
                return -EINVAL;
        }
        if (tgt_prog) {
@@ -11274,26 +11286,24 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
                                break;
                        }
                if (subprog == -1) {
-                       verbose(env, "Subprog %s doesn't exist\n", tname);
+                       bpf_log(log, "Subprog %s doesn't exist\n", tname);
                        return -EINVAL;
                }
                conservative = aux->func_info_aux[subprog].unreliable;
                if (prog_extension) {
                        if (conservative) {
-                               verbose(env,
+                               bpf_log(log,
                                        "Cannot replace static functions\n");
                                return -EINVAL;
                        }
                        if (!prog->jit_requested) {
-                               verbose(env,
+                               bpf_log(log,
                                        "Extension programs should be JITed\n");
                                return -EINVAL;
                        }
-                       env->ops = bpf_verifier_ops[tgt_prog->type];
-                       prog->expected_attach_type = tgt_prog->expected_attach_type;
                }
                if (!tgt_prog->jited) {
-                       verbose(env, "Can attach to only JITed progs\n");
+                       bpf_log(log, "Can attach to only JITed progs\n");
                        return -EINVAL;
                }
                if (tgt_prog->type == prog->type) {
@@ -11301,7 +11311,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
                         * Cannot attach program extension to another extension.
                         * It's ok to attach fentry/fexit to extension program.
                         */
-                       verbose(env, "Cannot recursively attach\n");
+                       bpf_log(log, "Cannot recursively attach\n");
                        return -EINVAL;
                }
                if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
@@ -11323,32 +11333,30 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
                         * reasonable stack size. Hence extending fentry is not
                         * allowed.
                         */
-                       verbose(env, "Cannot extend fentry/fexit\n");
+                       bpf_log(log, "Cannot extend fentry/fexit\n");
                        return -EINVAL;
                }
-               key = ((u64)aux->id) << 32 | btf_id;
        } else {
                if (prog_extension) {
-                       verbose(env, "Cannot replace kernel functions\n");
+                       bpf_log(log, "Cannot replace kernel functions\n");
                        return -EINVAL;
                }
-               key = btf_id;
        }
 
        switch (prog->expected_attach_type) {
        case BPF_TRACE_RAW_TP:
                if (tgt_prog) {
-                       verbose(env,
+                       bpf_log(log,
                                "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
                        return -EINVAL;
                }
                if (!btf_type_is_typedef(t)) {
-                       verbose(env, "attach_btf_id %u is not a typedef\n",
+                       bpf_log(log, "attach_btf_id %u is not a typedef\n",
                                btf_id);
                        return -EINVAL;
                }
                if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
-                       verbose(env, "attach_btf_id %u points to wrong type name %s\n",
+                       bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
                                btf_id, tname);
                        return -EINVAL;
                }
@@ -11362,29 +11370,20 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
                        /* should never happen in valid vmlinux build */
                        return -EINVAL;
 
-               /* remember two read only pointers that are valid for
-                * the life time of the kernel
-                */
-               prog->aux->attach_func_name = tname;
-               prog->aux->attach_func_proto = t;
-               prog->aux->attach_btf_trace = true;
-               return 0;
+               break;
        case BPF_TRACE_ITER:
                if (!btf_type_is_func(t)) {
-                       verbose(env, "attach_btf_id %u is not a function\n",
+                       bpf_log(log, "attach_btf_id %u is not a function\n",
                                btf_id);
                        return -EINVAL;
                }
                t = btf_type_by_id(btf, t->type);
                if (!btf_type_is_func_proto(t))
                        return -EINVAL;
-               prog->aux->attach_func_name = tname;
-               prog->aux->attach_func_proto = t;
-               if (!bpf_iter_prog_supported(prog))
-                       return -EINVAL;
-               ret = btf_distill_func_proto(&env->log, btf, t,
-                                            tname, &fmodel);
-               return ret;
+               ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
+               if (ret)
+                       return ret;
+               break;
        default:
                if (!prog_extension)
                        return -EINVAL;
@@ -11393,42 +11392,30 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
        case BPF_LSM_MAC:
        case BPF_TRACE_FENTRY:
        case BPF_TRACE_FEXIT:
-               prog->aux->attach_func_name = tname;
-               if (prog->type == BPF_PROG_TYPE_LSM) {
-                       ret = bpf_lsm_verify_prog(&env->log, prog);
-                       if (ret < 0)
-                               return ret;
-               }
-
                if (!btf_type_is_func(t)) {
-                       verbose(env, "attach_btf_id %u is not a function\n",
+                       bpf_log(log, "attach_btf_id %u is not a function\n",
                                btf_id);
                        return -EINVAL;
                }
                if (prog_extension &&
-                   btf_check_type_match(env, prog, btf, t))
+                   btf_check_type_match(log, prog, btf, t))
                        return -EINVAL;
                t = btf_type_by_id(btf, t->type);
                if (!btf_type_is_func_proto(t))
                        return -EINVAL;
-               tr = bpf_trampoline_lookup(key);
-               if (!tr)
-                       return -ENOMEM;
-               /* t is either vmlinux type or another program's type */
-               prog->aux->attach_func_proto = t;
-               mutex_lock(&tr->mutex);
-               if (tr->func.addr) {
-                       prog->aux->trampoline = tr;
-                       goto out;
-               }
-               if (tgt_prog && conservative) {
-                       prog->aux->attach_func_proto = NULL;
+
+               if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
+                   (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
+                    prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
+                       return -EINVAL;
+
+               if (tgt_prog && conservative)
                        t = NULL;
-               }
-               ret = btf_distill_func_proto(&env->log, btf, t,
-                                            tname, &tr->func.model);
+
+               ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
                if (ret < 0)
-                       goto out;
+                       return ret;
+
                if (tgt_prog) {
                        if (subprog == 0)
                                addr = (long) tgt_prog->bpf_func;
@@ -11437,11 +11424,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
                } else {
                        addr = kallsyms_lookup_name(tname);
                        if (!addr) {
-                               verbose(env,
+                               bpf_log(log,
                                        "The address of function %s cannot be found\n",
                                        tname);
-                               ret = -ENOENT;
-                               goto out;
+                               return -ENOENT;
                        }
                }
 
@@ -11466,25 +11452,109 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
                        default:
                                break;
                        }
-                       if (ret)
-                               verbose(env, "%s is not sleepable\n",
-                                       prog->aux->attach_func_name);
+                       if (ret) {
+                               bpf_log(log, "%s is not sleepable\n", tname);
+                               return ret;
+                       }
                } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
-                       ret = check_attach_modify_return(prog, addr);
-                       if (ret)
-                               verbose(env, "%s() is not modifiable\n",
-                                       prog->aux->attach_func_name);
+                       if (tgt_prog) {
+                               bpf_log(log, "can't modify return codes of BPF programs\n");
+                               return -EINVAL;
+                       }
+                       ret = check_attach_modify_return(addr, tname);
+                       if (ret) {
+                               bpf_log(log, "%s() is not modifiable\n", tname);
+                               return ret;
+                       }
                }
-               if (ret)
-                       goto out;
-               tr->func.addr = (void *)addr;
-               prog->aux->trampoline = tr;
-out:
-               mutex_unlock(&tr->mutex);
-               if (ret)
-                       bpf_trampoline_put(tr);
+
+               break;
+       }
+       tgt_info->tgt_addr = addr;
+       tgt_info->tgt_name = tname;
+       tgt_info->tgt_type = t;
+       return 0;
+}
+
+static int check_attach_btf_id(struct bpf_verifier_env *env)
+{
+       struct bpf_prog *prog = env->prog;
+       struct bpf_prog *tgt_prog = prog->aux->dst_prog;
+       struct bpf_attach_target_info tgt_info = {};
+       u32 btf_id = prog->aux->attach_btf_id;
+       struct bpf_trampoline *tr;
+       int ret;
+       u64 key;
+
+       if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
+           prog->type != BPF_PROG_TYPE_LSM) {
+               verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
+               return -EINVAL;
+       }
+
+       if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
+               return check_struct_ops_btf_id(env);
+
+       if (prog->type != BPF_PROG_TYPE_TRACING &&
+           prog->type != BPF_PROG_TYPE_LSM &&
+           prog->type != BPF_PROG_TYPE_EXT)
+               return 0;
+
+       ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
+       if (ret)
                return ret;
+
+       if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
+               /* to make freplace equivalent to their targets, they need to
+                * inherit env->ops and expected_attach_type for the rest of the
+                * verification
+                */
+               env->ops = bpf_verifier_ops[tgt_prog->type];
+               prog->expected_attach_type = tgt_prog->expected_attach_type;
        }
+
+       /* store info about the attachment target that will be used later */
+       prog->aux->attach_func_proto = tgt_info.tgt_type;
+       prog->aux->attach_func_name = tgt_info.tgt_name;
+
+       if (tgt_prog) {
+               prog->aux->saved_dst_prog_type = tgt_prog->type;
+               prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
+       }
+
+       if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
+               prog->aux->attach_btf_trace = true;
+               return 0;
+       } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
+               if (!bpf_iter_prog_supported(prog))
+                       return -EINVAL;
+               return 0;
+       }
+
+       if (prog->type == BPF_PROG_TYPE_LSM) {
+               ret = bpf_lsm_verify_prog(&env->log, prog);
+               if (ret < 0)
+                       return ret;
+       }
+
+       key = bpf_trampoline_compute_key(tgt_prog, btf_id);
+       tr = bpf_trampoline_get(key, &tgt_info);
+       if (!tr)
+               return -ENOMEM;
+
+       prog->aux->dst_trampoline = tr;
+       return 0;
+}
+
+struct btf *bpf_get_btf_vmlinux(void)
+{
+       if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+               mutex_lock(&bpf_verifier_lock);
+               if (!btf_vmlinux)
+                       btf_vmlinux = btf_parse_vmlinux();
+               mutex_unlock(&bpf_verifier_lock);
+       }
+       return btf_vmlinux;
 }
 
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
@@ -11520,12 +11590,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
        env->ops = bpf_verifier_ops[env->prog->type];
        is_priv = bpf_capable();
 
-       if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
-               mutex_lock(&bpf_verifier_lock);
-               if (!btf_vmlinux)
-                       btf_vmlinux = btf_parse_vmlinux();
-               mutex_unlock(&bpf_verifier_lock);
-       }
+       bpf_get_btf_vmlinux();
 
        /* grab the mutex to protect few globals used by verifier */
        if (!is_priv)
index 05d3e13..d5d9f2d 100644 (file)
@@ -28,6 +28,8 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
  * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
  * @gp_func: This flavor's grace-period-wait function.
  * @gp_state: Grace period's most recent state transition (debugging).
+ * @gp_sleep: Per-grace-period sleep to prevent CPU-bound looping.
+ * @init_fract: Initial backoff sleep interval.
  * @gp_jiffies: Time of last @gp_state transition.
  * @gp_start: Most recent grace-period start in jiffies.
  * @n_gps: Number of grace periods completed since boot.
@@ -48,6 +50,8 @@ struct rcu_tasks {
        struct wait_queue_head cbs_wq;
        raw_spinlock_t cbs_lock;
        int gp_state;
+       int gp_sleep;
+       int init_fract;
        unsigned long gp_jiffies;
        unsigned long gp_start;
        unsigned long n_gps;
@@ -81,7 +85,7 @@ static struct rcu_tasks rt_name =                                     \
 DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
 
 /* Avoid IPIing CPUs early in the grace period. */
-#define RCU_TASK_IPI_DELAY (HZ / 2)
+#define RCU_TASK_IPI_DELAY (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) ? HZ / 2 : 0)
 static int rcu_task_ipi_delay __read_mostly = RCU_TASK_IPI_DELAY;
 module_param(rcu_task_ipi_delay, int, 0644);
 
@@ -231,7 +235,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
                        cond_resched();
                }
                /* Paranoid sleep to keep this from entering a tight loop */
-               schedule_timeout_idle(HZ/10);
+               schedule_timeout_idle(rtp->gp_sleep);
 
                set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
        }
@@ -329,8 +333,10 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
         */
        lastreport = jiffies;
 
-       /* Start off with HZ/10 wait and slowly back off to 1 HZ wait. */
-       fract = 10;
+       // Start off with initial wait and slowly back off to 1 HZ wait.
+       fract = rtp->init_fract;
+       if (fract > HZ)
+               fract = HZ;
 
        for (;;) {
                bool firstreport;
@@ -553,6 +559,8 @@ EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
 
 static int __init rcu_spawn_tasks_kthread(void)
 {
+       rcu_tasks.gp_sleep = HZ / 10;
+       rcu_tasks.init_fract = 10;
        rcu_tasks.pregp_func = rcu_tasks_pregp_step;
        rcu_tasks.pertask_func = rcu_tasks_pertask;
        rcu_tasks.postscan_func = rcu_tasks_postscan;
@@ -685,6 +693,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);
 
 static int __init rcu_spawn_tasks_rude_kthread(void)
 {
+       rcu_tasks_rude.gp_sleep = HZ / 10;
        rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
        return 0;
 }
@@ -745,9 +754,9 @@ static DEFINE_PER_CPU(bool, trc_ipi_to_cpu);
 
 // The number of detections of task quiescent state relying on
 // heavyweight readers executing explicit memory barriers.
-unsigned long n_heavy_reader_attempts;
-unsigned long n_heavy_reader_updates;
-unsigned long n_heavy_reader_ofl_updates;
+static unsigned long n_heavy_reader_attempts;
+static unsigned long n_heavy_reader_updates;
+static unsigned long n_heavy_reader_ofl_updates;
 
 void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
 DEFINE_RCU_TASKS(rcu_tasks_trace, rcu_tasks_wait_gp, call_rcu_tasks_trace,
@@ -821,6 +830,12 @@ static void trc_read_check_handler(void *t_in)
                WRITE_ONCE(t->trc_reader_checked, true);
                goto reset_ipi;
        }
+       // If we are racing with an rcu_read_unlock_trace(), try again later.
+       if (unlikely(t->trc_reader_nesting < 0)) {
+               if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
+                       wake_up(&trc_wait);
+               goto reset_ipi;
+       }
        WRITE_ONCE(t->trc_reader_checked, true);
 
        // Get here if the task is in a read-side critical section.  Set
@@ -911,7 +926,8 @@ static void trc_wait_for_one_reader(struct task_struct *t,
 
        // If currently running, send an IPI, either way, add to list.
        trc_add_holdout(t, bhp);
-       if (task_curr(t) && time_after(jiffies, rcu_tasks_trace.gp_start + rcu_task_ipi_delay)) {
+       if (task_curr(t) &&
+           time_after(jiffies + 1, rcu_tasks_trace.gp_start + rcu_task_ipi_delay)) {
                // The task is currently running, so try IPIing it.
                cpu = task_cpu(t);
 
@@ -1072,15 +1088,17 @@ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
                if (ret)
                        break;  // Count reached zero.
                // Stall warning time, so make a list of the offenders.
+               rcu_read_lock();
                for_each_process_thread(g, t)
                        if (READ_ONCE(t->trc_reader_special.b.need_qs))
                                trc_add_holdout(t, &holdouts);
+               rcu_read_unlock();
                firstreport = true;
-               list_for_each_entry_safe(t, g, &holdouts, trc_holdout_list)
-                       if (READ_ONCE(t->trc_reader_special.b.need_qs)) {
+               list_for_each_entry_safe(t, g, &holdouts, trc_holdout_list) {
+                       if (READ_ONCE(t->trc_reader_special.b.need_qs))
                                show_stalled_task_trace(t, &firstreport);
-                               trc_del_holdout(t);
-                       }
+                       trc_del_holdout(t); // Release task_struct reference.
+               }
                if (firstreport)
                        pr_err("INFO: rcu_tasks_trace detected stalls? (Counter/taskslist mismatch?)\n");
                show_stalled_ipi_trace();
@@ -1163,6 +1181,17 @@ EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace);
 
 static int __init rcu_spawn_tasks_trace_kthread(void)
 {
+       if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) {
+               rcu_tasks_trace.gp_sleep = HZ / 10;
+               rcu_tasks_trace.init_fract = 10;
+       } else {
+               rcu_tasks_trace.gp_sleep = HZ / 200;
+               if (rcu_tasks_trace.gp_sleep <= 0)
+                       rcu_tasks_trace.gp_sleep = 1;
+               rcu_tasks_trace.init_fract = HZ / 5;
+               if (rcu_tasks_trace.init_fract <= 0)
+                       rcu_tasks_trace.init_fract = 1;
+       }
        rcu_tasks_trace.pregp_func = rcu_tasks_trace_pregp_step;
        rcu_tasks_trace.pertask_func = rcu_tasks_trace_pertask;
        rcu_tasks_trace.postscan_func = rcu_tasks_trace_postscan;
index 36508f4..e118a83 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <linux/bpf.h>
 #include <linux/bpf_perf_event.h>
+#include <linux/btf.h>
 #include <linux/filter.h>
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
@@ -16,6 +17,9 @@
 #include <linux/error-injection.h>
 #include <linux/btf_ids.h>
 
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/btf.h>
+
 #include <asm/tlb.h>
 
 #include "trace_probe.h"
@@ -67,6 +71,10 @@ static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
+static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
+                                 u64 flags, const struct btf **btf,
+                                 s32 *btf_id);
+
 /**
  * trace_call_bpf - invoke BPF program
  * @call: tracepoint event
@@ -772,6 +780,31 @@ static const struct bpf_func_proto bpf_seq_write_proto = {
        .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
 };
 
+BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr,
+          u32, btf_ptr_size, u64, flags)
+{
+       const struct btf *btf;
+       s32 btf_id;
+       int ret;
+
+       ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
+       if (ret)
+               return ret;
+
+       return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags);
+}
+
+static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
+       .func           = bpf_seq_printf_btf,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID,
+       .arg1_btf_id    = &btf_seq_file_ids[0],
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg4_type      = ARG_ANYTHING,
+};
+
 static __always_inline int
 get_map_perf_counter(struct bpf_map *map, u64 flags,
                     u64 *value, u64 *enabled, u64 *running)
@@ -1147,6 +1180,65 @@ static const struct bpf_func_proto bpf_d_path_proto = {
        .allowed        = bpf_d_path_allowed,
 };
 
+#define BTF_F_ALL      (BTF_F_COMPACT  | BTF_F_NONAME | \
+                        BTF_F_PTR_RAW | BTF_F_ZERO)
+
+static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
+                                 u64 flags, const struct btf **btf,
+                                 s32 *btf_id)
+{
+       const struct btf_type *t;
+
+       if (unlikely(flags & ~(BTF_F_ALL)))
+               return -EINVAL;
+
+       if (btf_ptr_size != sizeof(struct btf_ptr))
+               return -EINVAL;
+
+       *btf = bpf_get_btf_vmlinux();
+
+       if (IS_ERR_OR_NULL(*btf))
+               return PTR_ERR(*btf);
+
+       if (ptr->type_id > 0)
+               *btf_id = ptr->type_id;
+       else
+               return -EINVAL;
+
+       if (*btf_id > 0)
+               t = btf_type_by_id(*btf, *btf_id);
+       if (*btf_id <= 0 || !t)
+               return -ENOENT;
+
+       return 0;
+}
+
+BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr,
+          u32, btf_ptr_size, u64, flags)
+{
+       const struct btf *btf;
+       s32 btf_id;
+       int ret;
+
+       ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
+       if (ret)
+               return ret;
+
+       return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size,
+                                     flags);
+}
+
+const struct bpf_func_proto bpf_snprintf_btf_proto = {
+       .func           = bpf_snprintf_btf,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_MEM,
+       .arg2_type      = ARG_CONST_SIZE,
+       .arg3_type      = ARG_PTR_TO_MEM,
+       .arg4_type      = ARG_CONST_SIZE,
+       .arg5_type      = ARG_ANYTHING,
+};
+
 const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1233,6 +1325,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_get_task_stack_proto;
        case BPF_FUNC_copy_from_user:
                return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
+       case BPF_FUNC_snprintf_btf:
+               return &bpf_snprintf_btf_proto;
        default:
                return NULL;
        }
@@ -1630,6 +1724,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return prog->expected_attach_type == BPF_TRACE_ITER ?
                       &bpf_seq_write_proto :
                       NULL;
+       case BPF_FUNC_seq_printf_btf:
+               return prog->expected_attach_type == BPF_TRACE_ITER ?
+                      &bpf_seq_printf_btf_proto :
+                      NULL;
        case BPF_FUNC_d_path:
                return &bpf_d_path_proto;
        default:
@@ -1678,6 +1776,7 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
 };
 
 const struct bpf_prog_ops raw_tracepoint_prog_ops = {
+       .test_run = bpf_prog_test_run_raw_tp,
 };
 
 const struct bpf_verifier_ops tracing_verifier_ops = {
index a66f211..c1c30a9 100644 (file)
@@ -11,6 +11,7 @@
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <linux/error-injection.h>
+#include <linux/smp.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/bpf_test_run.h>
@@ -204,6 +205,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
        int b = 2, err = -EFAULT;
        u32 retval = 0;
 
+       if (kattr->test.flags || kattr->test.cpu)
+               return -EINVAL;
+
        switch (prog->expected_attach_type) {
        case BPF_TRACE_FENTRY:
        case BPF_TRACE_FEXIT:
@@ -236,6 +240,84 @@ out:
        return err;
 }
 
+struct bpf_raw_tp_test_run_info {
+       struct bpf_prog *prog;
+       void *ctx;
+       u32 retval;
+};
+
+static void
+__bpf_prog_test_run_raw_tp(void *data)
+{
+       struct bpf_raw_tp_test_run_info *info = data;
+
+       rcu_read_lock();
+       info->retval = BPF_PROG_RUN(info->prog, info->ctx);
+       rcu_read_unlock();
+}
+
+int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
+                            const union bpf_attr *kattr,
+                            union bpf_attr __user *uattr)
+{
+       void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
+       __u32 ctx_size_in = kattr->test.ctx_size_in;
+       struct bpf_raw_tp_test_run_info info;
+       int cpu = kattr->test.cpu, err = 0;
+       int current_cpu;
+
+       /* doesn't support data_in/out, ctx_out, duration, or repeat */
+       if (kattr->test.data_in || kattr->test.data_out ||
+           kattr->test.ctx_out || kattr->test.duration ||
+           kattr->test.repeat)
+               return -EINVAL;
+
+       if (ctx_size_in < prog->aux->max_ctx_offset)
+               return -EINVAL;
+
+       if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0)
+               return -EINVAL;
+
+       if (ctx_size_in) {
+               info.ctx = kzalloc(ctx_size_in, GFP_USER);
+               if (!info.ctx)
+                       return -ENOMEM;
+               if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) {
+                       err = -EFAULT;
+                       goto out;
+               }
+       } else {
+               info.ctx = NULL;
+       }
+
+       info.prog = prog;
+
+       current_cpu = get_cpu();
+       if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 ||
+           cpu == current_cpu) {
+               __bpf_prog_test_run_raw_tp(&info);
+       } else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
+               /* smp_call_function_single() also checks cpu_online()
+                * after csd_lock(). However, since cpu is from user
+                * space, let's do an extra quick check to filter out
+                * invalid value before smp_call_function_single().
+                */
+               err = -ENXIO;
+       } else {
+               err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp,
+                                              &info, 1);
+       }
+       put_cpu();
+
+       if (!err &&
+           copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))
+               err = -EFAULT;
+
+out:
+       kfree(info.ctx);
+       return err;
+}
+
 static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
 {
        void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
@@ -410,6 +492,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
        void *data;
        int ret;
 
+       if (kattr->test.flags || kattr->test.cpu)
+               return -EINVAL;
+
        data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
                             SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
        if (IS_ERR(data))
@@ -607,6 +692,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
        if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
                return -EINVAL;
 
+       if (kattr->test.flags || kattr->test.cpu)
+               return -EINVAL;
+
        if (size < ETH_HLEN)
                return -EINVAL;
 
index 838efc6..c907f0d 100644 (file)
@@ -269,7 +269,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
 {
        struct bpf_local_storage_data *sdata;
 
-       if (flags > BPF_SK_STORAGE_GET_F_CREATE)
+       if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
                return (unsigned long)NULL;
 
        sdata = sk_storage_lookup(sk, map, true);
@@ -299,6 +299,9 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
 
 BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
 {
+       if (!sk || !sk_fullsock(sk))
+               return -EINVAL;
+
        if (refcount_inc_not_zero(&sk->sk_refcnt)) {
                int err;
 
@@ -355,7 +358,7 @@ const struct bpf_func_proto bpf_sk_storage_get_proto = {
        .gpl_only       = false,
        .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
        .arg1_type      = ARG_CONST_MAP_PTR,
-       .arg2_type      = ARG_PTR_TO_SOCKET,
+       .arg2_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg3_type      = ARG_PTR_TO_MAP_VALUE_OR_NULL,
        .arg4_type      = ARG_ANYTHING,
 };
@@ -375,27 +378,7 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = {
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_CONST_MAP_PTR,
-       .arg2_type      = ARG_PTR_TO_SOCKET,
-};
-
-const struct bpf_func_proto sk_storage_get_btf_proto = {
-       .func           = bpf_sk_storage_get,
-       .gpl_only       = false,
-       .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
-       .arg1_type      = ARG_CONST_MAP_PTR,
-       .arg2_type      = ARG_PTR_TO_BTF_ID,
-       .arg2_btf_id    = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
-       .arg3_type      = ARG_PTR_TO_MAP_VALUE_OR_NULL,
-       .arg4_type      = ARG_ANYTHING,
-};
-
-const struct bpf_func_proto sk_storage_delete_btf_proto = {
-       .func           = bpf_sk_storage_delete,
-       .gpl_only       = false,
-       .ret_type       = RET_INTEGER,
-       .arg1_type      = ARG_CONST_MAP_PTR,
-       .arg2_type      = ARG_PTR_TO_BTF_ID,
-       .arg2_btf_id    = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
+       .arg2_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 };
 
 struct bpf_sk_storage_diag {
index 706f8db..3fb6ada 100644 (file)
@@ -77,6 +77,9 @@
 #include <net/transp_v6.h>
 #include <linux/btf_ids.h>
 
+static const struct bpf_func_proto *
+bpf_sk_base_func_proto(enum bpf_func_id func_id);
+
 int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
 {
        if (in_compat_syscall()) {
@@ -2160,13 +2163,233 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
                return __bpf_redirect_no_mac(skb, dev, flags);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
+{
+       struct dst_entry *dst = skb_dst(skb);
+       struct net_device *dev = dst->dev;
+       u32 hh_len = LL_RESERVED_SPACE(dev);
+       const struct in6_addr *nexthop;
+       struct neighbour *neigh;
+
+       if (dev_xmit_recursion()) {
+               net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
+               goto out_drop;
+       }
+
+       skb->dev = dev;
+       skb->tstamp = 0;
+
+       if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+               struct sk_buff *skb2;
+
+               skb2 = skb_realloc_headroom(skb, hh_len);
+               if (unlikely(!skb2)) {
+                       kfree_skb(skb);
+                       return -ENOMEM;
+               }
+               if (skb->sk)
+                       skb_set_owner_w(skb2, skb->sk);
+               consume_skb(skb);
+               skb = skb2;
+       }
+
+       rcu_read_lock_bh();
+       nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
+                             &ipv6_hdr(skb)->daddr);
+       neigh = ip_neigh_gw6(dev, nexthop);
+       if (likely(!IS_ERR(neigh))) {
+               int ret;
+
+               sock_confirm_neigh(skb, neigh);
+               dev_xmit_recursion_inc();
+               ret = neigh_output(neigh, skb, false);
+               dev_xmit_recursion_dec();
+               rcu_read_unlock_bh();
+               return ret;
+       }
+       rcu_read_unlock_bh();
+       IP6_INC_STATS(dev_net(dst->dev),
+                     ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+out_drop:
+       kfree_skb(skb);
+       return -ENETDOWN;
+}
+
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+{
+       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+       struct net *net = dev_net(dev);
+       int err, ret = NET_XMIT_DROP;
+       struct dst_entry *dst;
+       struct flowi6 fl6 = {
+               .flowi6_flags   = FLOWI_FLAG_ANYSRC,
+               .flowi6_mark    = skb->mark,
+               .flowlabel      = ip6_flowinfo(ip6h),
+               .flowi6_oif     = dev->ifindex,
+               .flowi6_proto   = ip6h->nexthdr,
+               .daddr          = ip6h->daddr,
+               .saddr          = ip6h->saddr,
+       };
+
+       dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+       if (IS_ERR(dst))
+               goto out_drop;
+
+       skb_dst_set(skb, dst);
+
+       err = bpf_out_neigh_v6(net, skb);
+       if (unlikely(net_xmit_eval(err)))
+               dev->stats.tx_errors++;
+       else
+               ret = NET_XMIT_SUCCESS;
+       goto out_xmit;
+out_drop:
+       dev->stats.tx_errors++;
+       kfree_skb(skb);
+out_xmit:
+       return ret;
+}
+#else
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+{
+       kfree_skb(skb);
+       return NET_XMIT_DROP;
+}
+#endif /* CONFIG_IPV6 */
+
+#if IS_ENABLED(CONFIG_INET)
+static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
+{
+       struct dst_entry *dst = skb_dst(skb);
+       struct rtable *rt = container_of(dst, struct rtable, dst);
+       struct net_device *dev = dst->dev;
+       u32 hh_len = LL_RESERVED_SPACE(dev);
+       struct neighbour *neigh;
+       bool is_v6gw = false;
+
+       if (dev_xmit_recursion()) {
+               net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
+               goto out_drop;
+       }
+
+       skb->dev = dev;
+       skb->tstamp = 0;
+
+       if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+               struct sk_buff *skb2;
+
+               skb2 = skb_realloc_headroom(skb, hh_len);
+               if (unlikely(!skb2)) {
+                       kfree_skb(skb);
+                       return -ENOMEM;
+               }
+               if (skb->sk)
+                       skb_set_owner_w(skb2, skb->sk);
+               consume_skb(skb);
+               skb = skb2;
+       }
+
+       rcu_read_lock_bh();
+       neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+       if (likely(!IS_ERR(neigh))) {
+               int ret;
+
+               sock_confirm_neigh(skb, neigh);
+               dev_xmit_recursion_inc();
+               ret = neigh_output(neigh, skb, is_v6gw);
+               dev_xmit_recursion_dec();
+               rcu_read_unlock_bh();
+               return ret;
+       }
+       rcu_read_unlock_bh();
+out_drop:
+       kfree_skb(skb);
+       return -ENETDOWN;
+}
+
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+{
+       const struct iphdr *ip4h = ip_hdr(skb);
+       struct net *net = dev_net(dev);
+       int err, ret = NET_XMIT_DROP;
+       struct rtable *rt;
+       struct flowi4 fl4 = {
+               .flowi4_flags   = FLOWI_FLAG_ANYSRC,
+               .flowi4_mark    = skb->mark,
+               .flowi4_tos     = RT_TOS(ip4h->tos),
+               .flowi4_oif     = dev->ifindex,
+               .flowi4_proto   = ip4h->protocol,
+               .daddr          = ip4h->daddr,
+               .saddr          = ip4h->saddr,
+       };
+
+       rt = ip_route_output_flow(net, &fl4, NULL);
+       if (IS_ERR(rt))
+               goto out_drop;
+       if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
+               ip_rt_put(rt);
+               goto out_drop;
+       }
+
+       skb_dst_set(skb, &rt->dst);
+
+       err = bpf_out_neigh_v4(net, skb);
+       if (unlikely(net_xmit_eval(err)))
+               dev->stats.tx_errors++;
+       else
+               ret = NET_XMIT_SUCCESS;
+       goto out_xmit;
+out_drop:
+       dev->stats.tx_errors++;
+       kfree_skb(skb);
+out_xmit:
+       return ret;
+}
+#else
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+{
+       kfree_skb(skb);
+       return NET_XMIT_DROP;
+}
+#endif /* CONFIG_INET */
+
+static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
+{
+       struct ethhdr *ethh = eth_hdr(skb);
+
+       if (unlikely(skb->mac_header >= skb->network_header))
+               goto out;
+       bpf_push_mac_rcsum(skb);
+       if (is_multicast_ether_addr(ethh->h_dest))
+               goto out;
+
+       skb_pull(skb, sizeof(*ethh));
+       skb_unset_mac_header(skb);
+       skb_reset_network_header(skb);
+
+       if (skb->protocol == htons(ETH_P_IP))
+               return __bpf_redirect_neigh_v4(skb, dev);
+       else if (skb->protocol == htons(ETH_P_IPV6))
+               return __bpf_redirect_neigh_v6(skb, dev);
+out:
+       kfree_skb(skb);
+       return -ENOTSUPP;
+}
+
+/* Internal, non-exposed redirect flags. */
+enum {
+       BPF_F_NEIGH = (1ULL << 1),
+#define BPF_F_REDIRECT_INTERNAL        (BPF_F_NEIGH)
+};
+
 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
 {
        struct net_device *dev;
        struct sk_buff *clone;
        int ret;
 
-       if (unlikely(flags & ~(BPF_F_INGRESS)))
+       if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
                return -EINVAL;
 
        dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
@@ -2203,23 +2426,11 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
 DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
 EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
 
-BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
-{
-       struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-
-       if (unlikely(flags & ~(BPF_F_INGRESS)))
-               return TC_ACT_SHOT;
-
-       ri->flags = flags;
-       ri->tgt_index = ifindex;
-
-       return TC_ACT_REDIRECT;
-}
-
 int skb_do_redirect(struct sk_buff *skb)
 {
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
        struct net_device *dev;
+       u32 flags = ri->flags;
 
        dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
        ri->tgt_index = 0;
@@ -2228,7 +2439,22 @@ int skb_do_redirect(struct sk_buff *skb)
                return -EINVAL;
        }
 
-       return __bpf_redirect(skb, dev, ri->flags);
+       return flags & BPF_F_NEIGH ?
+              __bpf_redirect_neigh(skb, dev) :
+              __bpf_redirect(skb, dev, flags);
+}
+
+BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
+{
+       struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+       if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
+               return TC_ACT_SHOT;
+
+       ri->flags = flags;
+       ri->tgt_index = ifindex;
+
+       return TC_ACT_REDIRECT;
 }
 
 static const struct bpf_func_proto bpf_redirect_proto = {
@@ -2239,6 +2465,27 @@ static const struct bpf_func_proto bpf_redirect_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
+{
+       struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+       if (unlikely(flags))
+               return TC_ACT_SHOT;
+
+       ri->flags = BPF_F_NEIGH;
+       ri->tgt_index = ifindex;
+
+       return TC_ACT_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_redirect_neigh_proto = {
+       .func           = bpf_redirect_neigh,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+       .arg2_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
 {
        msg->apply_bytes = bytes;
@@ -2704,6 +2951,23 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
 };
+
+BPF_CALL_1(bpf_skb_cgroup_classid, const struct sk_buff *, skb)
+{
+       struct sock *sk = skb_to_full_sk(skb);
+
+       if (!sk || !sk_fullsock(sk))
+               return 0;
+
+       return sock_cgroup_classid(&sk->sk_cgrp_data);
+}
+
+static const struct bpf_func_proto bpf_skb_cgroup_classid_proto = {
+       .func           = bpf_skb_cgroup_classid,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
 #endif
 
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
@@ -4085,18 +4349,17 @@ static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
 {
        struct cgroup *cgrp;
 
+       sk = sk_to_full_sk(sk);
+       if (!sk || !sk_fullsock(sk))
+               return 0;
+
        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
        return cgroup_id(cgrp);
 }
 
 BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
 {
-       struct sock *sk = skb_to_full_sk(skb);
-
-       if (!sk || !sk_fullsock(sk))
-               return 0;
-
-       return __bpf_sk_cgroup_id(sk);
+       return __bpf_sk_cgroup_id(skb->sk);
 }
 
 static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
@@ -4112,6 +4375,10 @@ static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
        struct cgroup *ancestor;
        struct cgroup *cgrp;
 
+       sk = sk_to_full_sk(sk);
+       if (!sk || !sk_fullsock(sk))
+               return 0;
+
        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
        ancestor = cgroup_ancestor(cgrp, ancestor_level);
        if (!ancestor)
@@ -4123,12 +4390,7 @@ static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
 BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
           ancestor_level)
 {
-       struct sock *sk = skb_to_full_sk(skb);
-
-       if (!sk || !sk_fullsock(sk))
-               return 0;
-
-       return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
+       return __bpf_sk_ancestor_cgroup_id(skb->sk, ancestor_level);
 }
 
 static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
@@ -4148,7 +4410,7 @@ static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
        .func           = bpf_sk_cgroup_id,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
-       .arg1_type      = ARG_PTR_TO_SOCKET,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 };
 
 BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
@@ -4160,7 +4422,7 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
        .func           = bpf_sk_ancestor_cgroup_id,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
-       .arg1_type      = ARG_PTR_TO_SOCKET,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg2_type      = ARG_ANYTHING,
 };
 #endif
@@ -4214,7 +4476,7 @@ const struct bpf_func_proto bpf_xdp_output_proto = {
 
 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
 {
-       return skb->sk ? sock_gen_cookie(skb->sk) : 0;
+       return skb->sk ? __sock_gen_cookie(skb->sk) : 0;
 }
 
 static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
@@ -4226,7 +4488,7 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
 
 BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
 {
-       return sock_gen_cookie(ctx->sk);
+       return __sock_gen_cookie(ctx->sk);
 }
 
 static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
@@ -4238,7 +4500,7 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
 
 BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
 {
-       return sock_gen_cookie(ctx);
+       return __sock_gen_cookie(ctx);
 }
 
 static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
@@ -4250,7 +4512,7 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
 
 BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
 {
-       return sock_gen_cookie(ctx->sk);
+       return __sock_gen_cookie(ctx->sk);
 }
 
 static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
@@ -4263,7 +4525,7 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
 static u64 __bpf_get_netns_cookie(struct sock *sk)
 {
 #ifdef CONFIG_NET_NS
-       return net_gen_cookie(sk ? sk->sk_net.net : &init_net);
+       return __net_gen_cookie(sk ? sk->sk_net.net : &init_net);
 #else
        return 0;
 #endif
@@ -5694,7 +5956,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
 
 BPF_CALL_1(bpf_sk_release, struct sock *, sk)
 {
-       if (sk_is_refcounted(sk))
+       if (sk && sk_is_refcounted(sk))
                sock_gen_put(sk);
        return 0;
 }
@@ -5703,7 +5965,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
        .func           = bpf_sk_release,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
-       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 };
 
 BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@@ -6085,7 +6347,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
        u32 cookie;
        int ret;
 
-       if (unlikely(th_len < sizeof(*th)))
+       if (unlikely(!sk || th_len < sizeof(*th)))
                return -EINVAL;
 
        /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
@@ -6138,7 +6400,7 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
        .gpl_only       = true,
        .pkt_access     = true,
        .ret_type       = RET_INTEGER,
-       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg2_type      = ARG_PTR_TO_MEM,
        .arg3_type      = ARG_CONST_SIZE,
        .arg4_type      = ARG_PTR_TO_MEM,
@@ -6152,7 +6414,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
        u32 cookie;
        u16 mss;
 
-       if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+       if (unlikely(!sk || th_len < sizeof(*th) || th_len != th->doff * 4))
                return -EINVAL;
 
        if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
@@ -6207,7 +6469,7 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
        .gpl_only       = true, /* __cookie_v*_init_sequence() is GPL */
        .pkt_access     = true,
        .ret_type       = RET_INTEGER,
-       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg2_type      = ARG_PTR_TO_MEM,
        .arg3_type      = ARG_CONST_SIZE,
        .arg4_type      = ARG_PTR_TO_MEM,
@@ -6216,7 +6478,7 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
 
 BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
 {
-       if (flags != 0)
+       if (!sk || flags != 0)
                return -EINVAL;
        if (!skb_at_tc_ingress(skb))
                return -EOPNOTSUPP;
@@ -6240,7 +6502,7 @@ static const struct bpf_func_proto bpf_sk_assign_proto = {
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
-       .arg2_type      = ARG_PTR_TO_SOCK_COMMON,
+       .arg2_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg3_type      = ARG_ANYTHING,
 };
 
@@ -6620,7 +6882,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                        return NULL;
                }
        default:
-               return bpf_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id);
        }
 }
 
@@ -6639,7 +6901,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        case BPF_FUNC_perf_event_output:
                return &bpf_skb_event_output_proto;
        default:
-               return bpf_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id);
        }
 }
 
@@ -6741,6 +7003,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return bpf_get_skb_set_tunnel_proto(func_id);
        case BPF_FUNC_redirect:
                return &bpf_redirect_proto;
+       case BPF_FUNC_redirect_neigh:
+               return &bpf_redirect_neigh_proto;
        case BPF_FUNC_get_route_realm:
                return &bpf_get_route_realm_proto;
        case BPF_FUNC_get_hash_recalc:
@@ -6771,6 +7035,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        case BPF_FUNC_skb_get_xfrm_state:
                return &bpf_skb_get_xfrm_state_proto;
 #endif
+#ifdef CONFIG_CGROUP_NET_CLASSID
+       case BPF_FUNC_skb_cgroup_classid:
+               return &bpf_skb_cgroup_classid_proto;
+#endif
 #ifdef CONFIG_SOCK_CGROUP_DATA
        case BPF_FUNC_skb_cgroup_id:
                return &bpf_skb_cgroup_id_proto;
@@ -6800,7 +7068,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_sk_assign_proto;
 #endif
        default:
-               return bpf_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id);
        }
 }
 
@@ -6841,7 +7109,7 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_tcp_gen_syncookie_proto;
 #endif
        default:
-               return bpf_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id);
        }
 }
 
@@ -6883,7 +7151,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_tcp_sock_proto;
 #endif /* CONFIG_INET */
        default:
-               return bpf_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id);
        }
 }
 
@@ -6929,7 +7197,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_get_cgroup_classid_curr_proto;
 #endif
        default:
-               return bpf_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id);
        }
 }
 
@@ -6971,7 +7239,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_skc_lookup_tcp_proto;
 #endif
        default:
-               return bpf_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id);
        }
 }
 
@@ -6982,7 +7250,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        case BPF_FUNC_skb_load_bytes:
                return &bpf_flow_dissector_load_bytes_proto;
        default:
-               return bpf_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id);
        }
 }
 
@@ -7009,7 +7277,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        case BPF_FUNC_skb_under_cgroup:
                return &bpf_skb_under_cgroup_proto;
        default:
-               return bpf_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id);
        }
 }
 
@@ -9746,7 +10014,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        case BPF_FUNC_sk_release:
                return &bpf_sk_release_proto;
        default:
-               return bpf_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id);
        }
 }
 
@@ -9913,8 +10181,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
        .func                   = bpf_skc_to_tcp6_sock,
        .gpl_only               = false,
        .ret_type               = RET_PTR_TO_BTF_ID_OR_NULL,
-       .arg1_type              = ARG_PTR_TO_BTF_ID,
-       .arg1_btf_id            = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+       .arg1_type              = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .ret_btf_id             = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
 };
 
@@ -9930,8 +10197,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
        .func                   = bpf_skc_to_tcp_sock,
        .gpl_only               = false,
        .ret_type               = RET_PTR_TO_BTF_ID_OR_NULL,
-       .arg1_type              = ARG_PTR_TO_BTF_ID,
-       .arg1_btf_id            = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+       .arg1_type              = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .ret_btf_id             = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
 };
 
@@ -9954,8 +10220,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
        .func                   = bpf_skc_to_tcp_timewait_sock,
        .gpl_only               = false,
        .ret_type               = RET_PTR_TO_BTF_ID_OR_NULL,
-       .arg1_type              = ARG_PTR_TO_BTF_ID,
-       .arg1_btf_id            = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+       .arg1_type              = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .ret_btf_id             = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
 };
 
@@ -9978,8 +10243,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
        .func                   = bpf_skc_to_tcp_request_sock,
        .gpl_only               = false,
        .ret_type               = RET_PTR_TO_BTF_ID_OR_NULL,
-       .arg1_type              = ARG_PTR_TO_BTF_ID,
-       .arg1_btf_id            = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+       .arg1_type              = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .ret_btf_id             = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
 };
 
@@ -10000,7 +10264,37 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
        .func                   = bpf_skc_to_udp6_sock,
        .gpl_only               = false,
        .ret_type               = RET_PTR_TO_BTF_ID_OR_NULL,
-       .arg1_type              = ARG_PTR_TO_BTF_ID,
-       .arg1_btf_id            = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+       .arg1_type              = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .ret_btf_id             = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
 };
+
+static const struct bpf_func_proto *
+bpf_sk_base_func_proto(enum bpf_func_id func_id)
+{
+       const struct bpf_func_proto *func;
+
+       switch (func_id) {
+       case BPF_FUNC_skc_to_tcp6_sock:
+               func = &bpf_skc_to_tcp6_sock_proto;
+               break;
+       case BPF_FUNC_skc_to_tcp_sock:
+               func = &bpf_skc_to_tcp_sock_proto;
+               break;
+       case BPF_FUNC_skc_to_tcp_timewait_sock:
+               func = &bpf_skc_to_tcp_timewait_sock_proto;
+               break;
+       case BPF_FUNC_skc_to_tcp_request_sock:
+               func = &bpf_skc_to_tcp_request_sock_proto;
+               break;
+       case BPF_FUNC_skc_to_udp6_sock:
+               func = &bpf_skc_to_udp6_sock_proto;
+               break;
+       default:
+               return bpf_base_func_proto(func_id);
+       }
+
+       if (!perfmon_capable())
+               return NULL;
+
+       return func;
+}
index 944ab21..dbc66b8 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/net_namespace.h>
 #include <linux/sched/task.h>
 #include <linux/uidgid.h>
+#include <linux/cookie.h>
 
 #include <net/sock.h>
 #include <net/netlink.h>
@@ -69,16 +70,16 @@ EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
 
 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
 
-static atomic64_t cookie_gen;
+DEFINE_COOKIE(net_cookie);
 
-u64 net_gen_cookie(struct net *net)
+u64 __net_gen_cookie(struct net *net)
 {
        while (1) {
                u64 res = atomic64_read(&net->net_cookie);
 
                if (res)
                        return res;
-               res = atomic64_inc_return(&cookie_gen);
+               res = gen_cookie_next(&net_cookie);
                atomic64_cmpxchg(&net->net_cookie, 0, res);
        }
 }
@@ -1101,7 +1102,10 @@ static int __init net_ns_init(void)
                panic("Could not allocate generic netns");
 
        rcu_assign_pointer(init_net.gen, ng);
-       net_gen_cookie(&init_net);
+
+       preempt_disable();
+       __net_gen_cookie(&init_net);
+       preempt_enable();
 
        down_write(&pernet_ops_rwsem);
        if (setup_net(&init_net, &init_user_ns))
index c13ffbd..c9c45b9 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/tcp.h>
 #include <linux/workqueue.h>
 #include <linux/nospec.h>
-
+#include <linux/cookie.h>
 #include <linux/inet_diag.h>
 #include <linux/sock_diag.h>
 
@@ -19,16 +19,17 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
 static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 static DEFINE_MUTEX(sock_diag_table_mutex);
 static struct workqueue_struct *broadcast_wq;
-static atomic64_t cookie_gen;
 
-u64 sock_gen_cookie(struct sock *sk)
+DEFINE_COOKIE(sock_cookie);
+
+u64 __sock_gen_cookie(struct sock *sk)
 {
        while (1) {
                u64 res = atomic64_read(&sk->sk_cookie);
 
                if (res)
                        return res;
-               res = atomic64_inc_return(&cookie_gen);
+               res = gen_cookie_next(&sock_cookie);
                atomic64_cmpxchg(&sk->sk_cookie, 0, res);
        }
 }
index e1f05e3..e83a80e 100644 (file)
@@ -401,7 +401,7 @@ static void *sock_map_lookup_sys(struct bpf_map *map, void *key)
        if (!sk)
                return ERR_PTR(-ENOENT);
 
-       sock_gen_cookie(sk);
+       __sock_gen_cookie(sk);
        return &sk->sk_cookie;
 }
 
@@ -610,6 +610,9 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
        struct sock *sk = (struct sock *)value;
        int ret;
 
+       if (unlikely(!sk || !sk_fullsock(sk)))
+               return -EINVAL;
+
        if (!sock_map_sk_is_suitable(sk))
                return -EOPNOTSUPP;
 
@@ -1206,7 +1209,7 @@ static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
        if (!sk)
                return ERR_PTR(-ENOENT);
 
-       sock_gen_cookie(sk);
+       __sock_gen_cookie(sk);
        return &sk->sk_cookie;
 }
 
index 74a2ef5..618954f 100644 (file)
@@ -28,22 +28,6 @@ static u32 unsupported_ops[] = {
 static const struct btf_type *tcp_sock_type;
 static u32 tcp_sock_id, sock_id;
 
-static struct bpf_func_proto btf_sk_storage_get_proto __read_mostly;
-static struct bpf_func_proto btf_sk_storage_delete_proto __read_mostly;
-
-static void convert_sk_func_proto(struct bpf_func_proto *to, const struct bpf_func_proto *from)
-{
-       int i;
-
-       *to = *from;
-       for (i = 0; i < ARRAY_SIZE(to->arg_type); i++) {
-               if (to->arg_type[i] == ARG_PTR_TO_SOCKET) {
-                       to->arg_type[i] = ARG_PTR_TO_BTF_ID;
-                       to->arg_btf_id[i] = &tcp_sock_id;
-               }
-       }
-}
-
 static int bpf_tcp_ca_init(struct btf *btf)
 {
        s32 type_id;
@@ -59,9 +43,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
        tcp_sock_id = type_id;
        tcp_sock_type = btf_type_by_id(btf, tcp_sock_id);
 
-       convert_sk_func_proto(&btf_sk_storage_get_proto, &bpf_sk_storage_get_proto);
-       convert_sk_func_proto(&btf_sk_storage_delete_proto, &bpf_sk_storage_delete_proto);
-
        return 0;
 }
 
@@ -188,9 +169,9 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
        case BPF_FUNC_tcp_send_ack:
                return &bpf_tcp_send_ack_proto;
        case BPF_FUNC_sk_storage_get:
-               return &btf_sk_storage_get_proto;
+               return &bpf_sk_storage_get_proto;
        case BPF_FUNC_sk_storage_delete:
-               return &btf_sk_storage_delete_proto;
+               return &bpf_sk_storage_delete_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
index 3895697..ba4dfb1 100644 (file)
@@ -703,6 +703,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
                        xs->pool = xp_create_and_assign_umem(xs,
                                                             umem_xs->umem);
                        if (!xs->pool) {
+                               err = -ENOMEM;
                                sockfd_put(sock);
                                goto out_unlock;
                        }
index 2d883f6..dc1dd5e 100644 (file)
@@ -96,7 +96,7 @@ struct xsk_queue {
  * seen and read by the consumer.
  *
  * The consumer peeks into the ring to see if the producer has written
- * any new entries. If so, the producer can then read these entries
+ * any new entries. If so, the consumer can then read these entries
  * and when it is done reading them release them back to the producer
  * so that the producer can use these slots to fill in new entries.
  *
index cab9cca..8142d02 100644 (file)
@@ -31,28 +31,30 @@ struct {
 #define PARSE_IP 3
 #define PARSE_IPV6 4
 
-/* protocol dispatch routine.
- * It tail-calls next BPF program depending on eth proto
- * Note, we could have used:
- * bpf_tail_call(skb, &jmp_table, proto);
- * but it would need large prog_array
+/* Protocol dispatch routine. It tail-calls next BPF program depending
+ * on eth proto. Note, we could have used ...
+ *
+ *   bpf_tail_call(skb, &jmp_table, proto);
+ *
+ * ... but it would need large prog_array and cannot be optimised given
+ * the map key is not static.
  */
 static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
 {
        switch (proto) {
        case ETH_P_8021Q:
        case ETH_P_8021AD:
-               bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
+               bpf_tail_call_static(skb, &jmp_table, PARSE_VLAN);
                break;
        case ETH_P_MPLS_UC:
        case ETH_P_MPLS_MC:
-               bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
+               bpf_tail_call_static(skb, &jmp_table, PARSE_MPLS);
                break;
        case ETH_P_IP:
-               bpf_tail_call(skb, &jmp_table, PARSE_IP);
+               bpf_tail_call_static(skb, &jmp_table, PARSE_IP);
                break;
        case ETH_P_IPV6:
-               bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
+               bpf_tail_call_static(skb, &jmp_table, PARSE_IPV6);
                break;
        }
 }
index 0838817..7d86fdd 100755 (executable)
@@ -433,6 +433,7 @@ class PrinterHelpers(Printer):
             'struct sk_msg_md',
             'struct xdp_md',
             'struct path',
+            'struct btf_ptr',
     ]
     known_types = {
             '...',
@@ -474,6 +475,7 @@ class PrinterHelpers(Printer):
             'struct udp6_sock',
             'struct task_struct',
             'struct path',
+            'struct btf_ptr',
     }
     mapped_types = {
             'u8': '__u8',
index a228125..4f556cf 100644 (file)
@@ -414,6 +414,9 @@ enum {
 
 /* Enable memory-mapping BPF map */
        BPF_F_MMAPABLE          = (1U << 10),
+
+/* Share perf_event among processes */
+       BPF_F_PRESERVE_ELEMS    = (1U << 11),
 };
 
 /* Flags for BPF_PROG_QUERY. */
@@ -424,6 +427,11 @@ enum {
  */
 #define BPF_F_QUERY_EFFECTIVE  (1U << 0)
 
+/* Flags for BPF_PROG_TEST_RUN */
+
+/* If set, run the test on the cpu specified by bpf_attr.test.cpu */
+#define BPF_F_TEST_RUN_ON_CPU  (1U << 0)
+
 /* type for BPF_ENABLE_STATS */
 enum bpf_stats_type {
        /* enabled run_time_ns and run_cnt */
@@ -566,6 +574,8 @@ union bpf_attr {
                                                 */
                __aligned_u64   ctx_in;
                __aligned_u64   ctx_out;
+               __u32           flags;
+               __u32           cpu;
        } test;
 
        struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -632,8 +642,13 @@ union bpf_attr {
                };
                __u32           attach_type;    /* attach type */
                __u32           flags;          /* extra flags */
-               __aligned_u64   iter_info;      /* extra bpf_iter_link_info */
-               __u32           iter_info_len;  /* iter_info length */
+               union {
+                       __u32           target_btf_id;  /* btf_id of target to attach to */
+                       struct {
+                               __aligned_u64   iter_info;      /* extra bpf_iter_link_info */
+                               __u32           iter_info_len;  /* iter_info length */
+                       };
+               };
        } link_create;
 
        struct { /* struct used by BPF_LINK_UPDATE command */
@@ -2512,7 +2527,7 @@ union bpf_attr {
  *             result is from *reuse*\ **->socks**\ [] using the hash of the
  *             tuple.
  *
- * long bpf_sk_release(struct bpf_sock *sock)
+ * long bpf_sk_release(void *sock)
  *     Description
  *             Release the reference held by *sock*. *sock* must be a
  *             non-**NULL** pointer that was returned from
@@ -2692,7 +2707,7 @@ union bpf_attr {
  *             result is from *reuse*\ **->socks**\ [] using the hash of the
  *             tuple.
  *
- * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
  *     Description
  *             Check whether *iph* and *th* contain a valid SYN cookie ACK for
  *             the listening socket in *sk*.
@@ -2861,6 +2876,7 @@ union bpf_attr {
  *             0 on success.
  *
  *             **-ENOENT** if the bpf-local-storage cannot be found.
+ *             **-EINVAL** if sk is not a fullsock (e.g. a request_sock).
  *
  * long bpf_send_signal(u32 sig)
  *     Description
@@ -2877,7 +2893,7 @@ union bpf_attr {
  *
  *             **-EAGAIN** if bpf program can try again.
  *
- * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
  *     Description
  *             Try to issue a SYN cookie for the packet with corresponding
  *             IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
@@ -3106,7 +3122,7 @@ union bpf_attr {
  *     Return
  *             The id is returned or 0 in case the id could not be retrieved.
  *
- * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags)
  *     Description
  *             Helper is overloaded depending on BPF program type. This
  *             description applies to **BPF_PROG_TYPE_SCHED_CLS** and
@@ -3234,11 +3250,11 @@ union bpf_attr {
  *
  *             **-EOVERFLOW** if an overflow happened: The same object will be tried again.
  *
- * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ * u64 bpf_sk_cgroup_id(void *sk)
  *     Description
  *             Return the cgroup v2 id of the socket *sk*.
  *
- *             *sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ *             *sk* must be a non-**NULL** pointer to a socket, e.g. one
  *             returned from **bpf_sk_lookup_xxx**\ (),
  *             **bpf_sk_fullsock**\ (), etc. The format of returned id is
  *             same as in **bpf_skb_cgroup_id**\ ().
@@ -3248,7 +3264,7 @@ union bpf_attr {
  *     Return
  *             The id is returned or 0 in case the id could not be retrieved.
  *
- * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level)
  *     Description
  *             Return id of cgroup v2 that is ancestor of cgroup associated
  *             with the *sk* at the *ancestor_level*.  The root cgroup is at
@@ -3586,6 +3602,72 @@ union bpf_attr {
  *             the data in *dst*. This is a wrapper of **copy_from_user**\ ().
  *     Return
  *             0 on success, or a negative error in case of failure.
+ *
+ * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags)
+ *     Description
+ *             Use BTF to store a string representation of *ptr*->ptr in *str*,
+ *             using *ptr*->type_id.  This value should specify the type
+ *             that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1)
+ *             can be used to look up vmlinux BTF type ids. Traversing the
+ *             data structure using BTF, the type information and values are
+ *             stored in the first *str_size* - 1 bytes of *str*.  Safe copy of
+ *             the pointer data is carried out to avoid kernel crashes during
+ *             operation.  Smaller types can use string space on the stack;
+ *             larger programs can use map data to store the string
+ *             representation.
+ *
+ *             The string can be subsequently shared with userspace via
+ *             bpf_perf_event_output() or ring buffer interfaces.
+ *             bpf_trace_printk() is to be avoided as it places too small
+ *             a limit on string size to be useful.
+ *
+ *             *flags* is a combination of
+ *
+ *             **BTF_F_COMPACT**
+ *                     no formatting around type information
+ *             **BTF_F_NONAME**
+ *                     no struct/union member names/types
+ *             **BTF_F_PTR_RAW**
+ *                     show raw (unobfuscated) pointer values;
+ *                     equivalent to printk specifier %px.
+ *             **BTF_F_ZERO**
+ *                     show zero-valued struct/union members; they
+ *                     are not displayed by default
+ *
+ *     Return
+ *             The number of bytes that were written (or would have been
+ *             written if output had to be truncated due to string size),
+ *             or a negative error in cases of failure.
+ *
+ * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags)
+ *     Description
+ *             Use BTF to write to seq_write a string representation of
+ *             *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf().
+ *             *flags* are identical to those used for bpf_snprintf_btf.
+ *     Return
+ *             0 on success or a negative error in case of failure.
+ *
+ * u64 bpf_skb_cgroup_classid(struct sk_buff *skb)
+ *     Description
+ *             See **bpf_get_cgroup_classid**\ () for the main description.
+ *             This helper differs from **bpf_get_cgroup_classid**\ () in that
+ *             the cgroup v1 net_cls class is retrieved only from the *skb*'s
+ *             associated socket instead of the current process.
+ *     Return
+ *             The id is returned or 0 in case the id could not be retrieved.
+ *
+ * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ *     Description
+ *             Redirect the packet to another net device of index *ifindex*
+ *             and fill in L2 addresses from neighboring subsystem. This helper
+ *             is somewhat similar to **bpf_redirect**\ (), except that it
+ *             fills in e.g. MAC addresses based on the L3 information from
+ *             the packet. This helper is supported for IPv4 and IPv6 protocols.
+ *             The *flags* argument is reserved and must be 0. The helper is
+ *             currently only supported for tc BPF program types.
+ *     Return
+ *             The helper returns **TC_ACT_REDIRECT** on success or
+ *             **TC_ACT_SHOT** on error.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -3737,6 +3819,10 @@ union bpf_attr {
        FN(inode_storage_delete),       \
        FN(d_path),                     \
        FN(copy_from_user),             \
+       FN(snprintf_btf),               \
+       FN(seq_printf_btf),             \
+       FN(skb_cgroup_classid),         \
+       FN(redirect_neigh),             \
        /* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4845,4 +4931,34 @@ struct bpf_sk_lookup {
        __u32 local_port;       /* Host byte order */
 };
 
+/*
+ * struct btf_ptr is used for typed pointer representation; the
+ * type id is used to render the pointer data as the appropriate type
+ * via the bpf_snprintf_btf() helper described above.  A flags field -
+ * potentially to specify additional details about the BTF pointer
+ * (rather than its mode of display) - is included for future use.
+ * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately.
+ */
+struct btf_ptr {
+       void *ptr;
+       __u32 type_id;
+       __u32 flags;            /* BTF ptr flags; unused at present. */
+};
+
+/*
+ * Flags to control bpf_snprintf_btf() behaviour.
+ *     - BTF_F_COMPACT: no formatting around type information
+ *     - BTF_F_NONAME: no struct/union member names/types
+ *     - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values;
+ *       equivalent to %px.
+ *     - BTF_F_ZERO: show zero-valued struct/union members; they
+ *       are not displayed by default
+ */
+enum {
+       BTF_F_COMPACT   =       (1ULL << 0),
+       BTF_F_NONAME    =       (1ULL << 1),
+       BTF_F_PTR_RAW   =       (1ULL << 2),
+       BTF_F_ZERO      =       (1ULL << 3),
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
index f432496..5f9abed 100644 (file)
@@ -98,19 +98,18 @@ PC_FILE             = libbpf.pc
 ifdef EXTRA_CFLAGS
   CFLAGS := $(EXTRA_CFLAGS)
 else
-  CFLAGS := -g -Wall
+  CFLAGS := -g -O2
 endif
 
 # Append required CFLAGS
 override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
 override CFLAGS += -Werror -Wall
-override CFLAGS += -fPIC
 override CFLAGS += $(INCLUDES)
 override CFLAGS += -fvisibility=hidden
 override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
 
 # flags specific for shared library
-SHLIB_FLAGS := -DSHARED
+SHLIB_FLAGS := -DSHARED -fPIC
 
 ifeq ($(VERBOSE),1)
   Q =
index 2baa130..d27e341 100644 (file)
@@ -586,19 +586,31 @@ int bpf_link_create(int prog_fd, int target_fd,
                    enum bpf_attach_type attach_type,
                    const struct bpf_link_create_opts *opts)
 {
+       __u32 target_btf_id, iter_info_len;
        union bpf_attr attr;
 
        if (!OPTS_VALID(opts, bpf_link_create_opts))
                return -EINVAL;
 
+       iter_info_len = OPTS_GET(opts, iter_info_len, 0);
+       target_btf_id = OPTS_GET(opts, target_btf_id, 0);
+
+       if (iter_info_len && target_btf_id)
+               return -EINVAL;
+
        memset(&attr, 0, sizeof(attr));
        attr.link_create.prog_fd = prog_fd;
        attr.link_create.target_fd = target_fd;
        attr.link_create.attach_type = attach_type;
        attr.link_create.flags = OPTS_GET(opts, flags, 0);
-       attr.link_create.iter_info =
-               ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
-       attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0);
+
+       if (iter_info_len) {
+               attr.link_create.iter_info =
+                       ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+               attr.link_create.iter_info_len = iter_info_len;
+       } else if (target_btf_id) {
+               attr.link_create.target_btf_id = target_btf_id;
+       }
 
        return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
 }
@@ -712,6 +724,37 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
        return ret;
 }
 
+int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
+{
+       union bpf_attr attr;
+       int ret;
+
+       if (!OPTS_VALID(opts, bpf_test_run_opts))
+               return -EINVAL;
+
+       memset(&attr, 0, sizeof(attr));
+       attr.test.prog_fd = prog_fd;
+       attr.test.cpu = OPTS_GET(opts, cpu, 0);
+       attr.test.flags = OPTS_GET(opts, flags, 0);
+       attr.test.repeat = OPTS_GET(opts, repeat, 0);
+       attr.test.duration = OPTS_GET(opts, duration, 0);
+       attr.test.ctx_size_in = OPTS_GET(opts, ctx_size_in, 0);
+       attr.test.ctx_size_out = OPTS_GET(opts, ctx_size_out, 0);
+       attr.test.data_size_in = OPTS_GET(opts, data_size_in, 0);
+       attr.test.data_size_out = OPTS_GET(opts, data_size_out, 0);
+       attr.test.ctx_in = ptr_to_u64(OPTS_GET(opts, ctx_in, NULL));
+       attr.test.ctx_out = ptr_to_u64(OPTS_GET(opts, ctx_out, NULL));
+       attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL));
+       attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
+
+       ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+       OPTS_SET(opts, data_size_out, attr.test.data_size_out);
+       OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
+       OPTS_SET(opts, duration, attr.test.duration);
+       OPTS_SET(opts, retval, attr.test.retval);
+       return ret;
+}
+
 static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
 {
        union bpf_attr attr;
@@ -815,7 +858,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
        return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
 }
 
-int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
                 bool do_log)
 {
        union bpf_attr attr = {};
index 8c1ac4b..875dde2 100644 (file)
@@ -174,8 +174,9 @@ struct bpf_link_create_opts {
        __u32 flags;
        union bpf_iter_link_info *iter_info;
        __u32 iter_info_len;
+       __u32 target_btf_id;
 };
-#define bpf_link_create_opts__last_field iter_info_len
+#define bpf_link_create_opts__last_field target_btf_id
 
 LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
                               enum bpf_attach_type attach_type,
@@ -234,7 +235,7 @@ LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
                              __u32 query_flags, __u32 *attach_flags,
                              __u32 *prog_ids, __u32 *prog_cnt);
 LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
-LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
+LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
                            __u32 log_buf_size, bool do_log);
 LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
                                 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
@@ -251,6 +252,32 @@ struct bpf_prog_bind_opts {
 
 LIBBPF_API int bpf_prog_bind_map(int prog_fd, int map_fd,
                                 const struct bpf_prog_bind_opts *opts);
+
+struct bpf_test_run_opts {
+       size_t sz; /* size of this struct for forward/backward compatibility */
+       const void *data_in; /* optional */
+       void *data_out;      /* optional */
+       __u32 data_size_in;
+       __u32 data_size_out; /* in: max length of data_out
+                             * out: length of data_out
+                             */
+       const void *ctx_in; /* optional */
+       void *ctx_out;      /* optional */
+       __u32 ctx_size_in;
+       __u32 ctx_size_out; /* in: max length of ctx_out
+                            * out: length of cxt_out
+                            */
+       __u32 retval;        /* out: return code of the BPF program */
+       int repeat;
+       __u32 duration;      /* out: average per repetition in ns */
+       __u32 flags;
+       __u32 cpu;
+};
+#define bpf_test_run_opts__last_field cpu
+
+LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
+                                     struct bpf_test_run_opts *opts);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
index 1106777..2bdb7d6 100644 (file)
        })
 #endif
 
+/*
+ * Helper macro to throw a compilation error if __bpf_unreachable() gets
+ * built into the resulting code. This works given BPF back end does not
+ * implement __builtin_trap(). This is useful to assert that certain paths
+ * of the program code are never used and hence eliminated by the compiler.
+ *
+ * For example, consider a switch statement that covers known cases used by
+ * the program. __bpf_unreachable() can then reside in the default case. If
+ * the program gets extended such that a case is not covered in the switch
+ * statement, then it will throw a build error due to the default case not
+ * being compiled out.
+ */
+#ifndef __bpf_unreachable
+# define __bpf_unreachable()   __builtin_trap()
+#endif
+
+/*
+ * Helper function to perform a tail call with a constant/immediate map slot.
+ */
+static __always_inline void
+bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
+{
+       if (!__builtin_constant_p(slot))
+               __bpf_unreachable();
+
+       /*
+        * Provide a hard guarantee that LLVM won't optimize setting r2 (map
+        * pointer) and r3 (constant map index) from _different paths_ ending
+        * up at the _same_ call insn as otherwise we won't be able to use the
+        * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel
+        * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key
+        * tracking for prog array pokes") for details on verifier tracking.
+        *
+        * Note on clobber list: we need to stay in-line with BPF calling
+        * convention, so even if we don't end up using r0, r4, r5, we need
+        * to mark them as clobber so that LLVM doesn't end up using them
+        * before / after the call.
+        */
+       asm volatile("r1 = %[ctx]\n\t"
+                    "r2 = %[map]\n\t"
+                    "r3 = %[slot]\n\t"
+                    "call 12"
+                    :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
+                    : "r0", "r1", "r2", "r3", "r4", "r5");
+}
+
 /*
  * Helper structure used by eBPF C program
  * to describe BPF map attributes to libbpf loader
index a3d259e..231b072 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 /* Copyright (c) 2018 Facebook */
 
+#include <byteswap.h>
 #include <endian.h>
 #include <stdio.h>
 #include <stdlib.h>
 static struct btf_type btf_void;
 
 struct btf {
-       union {
-               struct btf_header *hdr;
-               void *data;
-       };
-       struct btf_type **types;
-       const char *strings;
-       void *nohdr_data;
+       /* raw BTF data in native endianness */
+       void *raw_data;
+       /* raw BTF data in non-native endianness */
+       void *raw_data_swapped;
+       __u32 raw_size;
+       /* whether target endianness differs from the native one */
+       bool swapped_endian;
+
+       /*
+        * When BTF is loaded from an ELF or raw memory it is stored
+        * in a contiguous memory block. The hdr, type_data, and, strs_data
+        * point inside that memory region to their respective parts of BTF
+        * representation:
+        *
+        * +--------------------------------+
+        * |  Header  |  Types  |  Strings  |
+        * +--------------------------------+
+        * ^          ^         ^
+        * |          |         |
+        * hdr        |         |
+        * types_data-+         |
+        * strs_data------------+
+        *
+        * If BTF data is later modified, e.g., due to types added or
+        * removed, BTF deduplication performed, etc, this contiguous
+        * representation is broken up into three independently allocated
+        * memory regions to be able to modify them independently.
+        * raw_data is nulled out at that point, but can be later allocated
+        * and cached again if user calls btf__get_raw_data(), at which point
+        * raw_data will contain a contiguous copy of header, types, and
+        * strings:
+        *
+        * +----------+  +---------+  +-----------+
+        * |  Header  |  |  Types  |  |  Strings  |
+        * +----------+  +---------+  +-----------+
+        * ^             ^            ^
+        * |             |            |
+        * hdr           |            |
+        * types_data----+            |
+        * strs_data------------------+
+        *
+        *               +----------+---------+-----------+
+        *               |  Header  |  Types  |  Strings  |
+        * raw_data----->+----------+---------+-----------+
+        */
+       struct btf_header *hdr;
+
+       void *types_data;
+       size_t types_data_cap; /* used size stored in hdr->type_len */
+
+       /* type ID to `struct btf_type *` lookup index */
+       __u32 *type_offs;
+       size_t type_offs_cap;
        __u32 nr_types;
-       __u32 types_size;
-       __u32 data_size;
+
+       void *strs_data;
+       size_t strs_data_cap; /* used size stored in hdr->str_len */
+
+       /* lookup index for each unique string in strings section */
+       struct hashmap *strs_hash;
+       /* whether strings are already deduplicated */
+       bool strs_deduped;
+       /* BTF object FD, if loaded into kernel */
        int fd;
+
+       /* Pointer size (in bytes) for a target architecture of this BTF */
        int ptr_sz;
 };
 
@@ -46,60 +102,114 @@ static inline __u64 ptr_to_u64(const void *ptr)
        return (__u64) (unsigned long) ptr;
 }
 
-static int btf_add_type(struct btf *btf, struct btf_type *t)
+/* Ensure given dynamically allocated memory region pointed to by *data* with
+ * capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough
+ * memory to accomodate *add_cnt* new elements, assuming *cur_cnt* elements
+ * are already used. At most *max_cnt* elements can be ever allocated.
+ * If necessary, memory is reallocated and all existing data is copied over,
+ * new pointer to the memory region is stored at *data, new memory region
+ * capacity (in number of elements) is stored in *cap.
+ * On success, memory pointer to the beginning of unused memory is returned.
+ * On error, NULL is returned.
+ */
+void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+                 size_t cur_cnt, size_t max_cnt, size_t add_cnt)
 {
-       if (btf->types_size - btf->nr_types < 2) {
-               struct btf_type **new_types;
-               __u32 expand_by, new_size;
+       size_t new_cnt;
+       void *new_data;
 
-               if (btf->types_size == BTF_MAX_NR_TYPES)
-                       return -E2BIG;
+       if (cur_cnt + add_cnt <= *cap_cnt)
+               return *data + cur_cnt * elem_sz;
 
-               expand_by = max(btf->types_size >> 2, 16U);
-               new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
+       /* requested more than the set limit */
+       if (cur_cnt + add_cnt > max_cnt)
+               return NULL;
 
-               new_types = libbpf_reallocarray(btf->types, new_size, sizeof(*new_types));
-               if (!new_types)
-                       return -ENOMEM;
+       new_cnt = *cap_cnt;
+       new_cnt += new_cnt / 4;           /* expand by 25% */
+       if (new_cnt < 16)                 /* but at least 16 elements */
+               new_cnt = 16;
+       if (new_cnt > max_cnt)            /* but not exceeding a set limit */
+               new_cnt = max_cnt;
+       if (new_cnt < cur_cnt + add_cnt)  /* also ensure we have enough memory */
+               new_cnt = cur_cnt + add_cnt;
+
+       new_data = libbpf_reallocarray(*data, new_cnt, elem_sz);
+       if (!new_data)
+               return NULL;
 
-               if (btf->nr_types == 0)
-                       new_types[0] = &btf_void;
+       /* zero out newly allocated portion of memory */
+       memset(new_data + (*cap_cnt) * elem_sz, 0, (new_cnt - *cap_cnt) * elem_sz);
 
-               btf->types = new_types;
-               btf->types_size = new_size;
-       }
+       *data = new_data;
+       *cap_cnt = new_cnt;
+       return new_data + cur_cnt * elem_sz;
+}
+
+/* Ensure given dynamically allocated memory region has enough allocated space
+ * to accommodate *need_cnt* elements of size *elem_sz* bytes each
+ */
+int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
+{
+       void *p;
+
+       if (need_cnt <= *cap_cnt)
+               return 0;
+
+       p = btf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
+       if (!p)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
+{
+       __u32 *p;
 
-       btf->types[++(btf->nr_types)] = t;
+       p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
+                       btf->nr_types + 1, BTF_MAX_NR_TYPES, 1);
+       if (!p)
+               return -ENOMEM;
 
+       *p = type_off;
        return 0;
 }
 
+static void btf_bswap_hdr(struct btf_header *h)
+{
+       h->magic = bswap_16(h->magic);
+       h->hdr_len = bswap_32(h->hdr_len);
+       h->type_off = bswap_32(h->type_off);
+       h->type_len = bswap_32(h->type_len);
+       h->str_off = bswap_32(h->str_off);
+       h->str_len = bswap_32(h->str_len);
+}
+
 static int btf_parse_hdr(struct btf *btf)
 {
-       const struct btf_header *hdr = btf->hdr;
+       struct btf_header *hdr = btf->hdr;
        __u32 meta_left;
 
-       if (btf->data_size < sizeof(struct btf_header)) {
+       if (btf->raw_size < sizeof(struct btf_header)) {
                pr_debug("BTF header not found\n");
                return -EINVAL;
        }
 
-       if (hdr->magic != BTF_MAGIC) {
+       if (hdr->magic == bswap_16(BTF_MAGIC)) {
+               btf->swapped_endian = true;
+               if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) {
+                       pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n",
+                               bswap_32(hdr->hdr_len));
+                       return -ENOTSUP;
+               }
+               btf_bswap_hdr(hdr);
+       } else if (hdr->magic != BTF_MAGIC) {
                pr_debug("Invalid BTF magic:%x\n", hdr->magic);
                return -EINVAL;
        }
 
-       if (hdr->version != BTF_VERSION) {
-               pr_debug("Unsupported BTF version:%u\n", hdr->version);
-               return -ENOTSUP;
-       }
-
-       if (hdr->flags) {
-               pr_debug("Unsupported BTF flags:%x\n", hdr->flags);
-               return -ENOTSUP;
-       }
-
-       meta_left = btf->data_size - sizeof(*hdr);
+       meta_left = btf->raw_size - sizeof(*hdr);
        if (!meta_left) {
                pr_debug("BTF has no data\n");
                return -EINVAL;
@@ -125,15 +235,13 @@ static int btf_parse_hdr(struct btf *btf)
                return -EINVAL;
        }
 
-       btf->nohdr_data = btf->hdr + 1;
-
        return 0;
 }
 
 static int btf_parse_str_sec(struct btf *btf)
 {
        const struct btf_header *hdr = btf->hdr;
-       const char *start = btf->nohdr_data + hdr->str_off;
+       const char *start = btf->strs_data;
        const char *end = start + btf->hdr->str_len;
 
        if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
@@ -142,14 +250,12 @@ static int btf_parse_str_sec(struct btf *btf)
                return -EINVAL;
        }
 
-       btf->strings = start;
-
        return 0;
 }
 
-static int btf_type_size(struct btf_type *t)
+static int btf_type_size(const struct btf_type *t)
 {
-       int base_size = sizeof(struct btf_type);
+       const int base_size = sizeof(struct btf_type);
        __u16 vlen = btf_vlen(t);
 
        switch (btf_kind(t)) {
@@ -182,25 +288,120 @@ static int btf_type_size(struct btf_type *t)
        }
 }
 
+static void btf_bswap_type_base(struct btf_type *t)
+{
+       t->name_off = bswap_32(t->name_off);
+       t->info = bswap_32(t->info);
+       t->type = bswap_32(t->type);
+}
+
+static int btf_bswap_type_rest(struct btf_type *t)
+{
+       struct btf_var_secinfo *v;
+       struct btf_member *m;
+       struct btf_array *a;
+       struct btf_param *p;
+       struct btf_enum *e;
+       __u16 vlen = btf_vlen(t);
+       int i;
+
+       switch (btf_kind(t)) {
+       case BTF_KIND_FWD:
+       case BTF_KIND_CONST:
+       case BTF_KIND_VOLATILE:
+       case BTF_KIND_RESTRICT:
+       case BTF_KIND_PTR:
+       case BTF_KIND_TYPEDEF:
+       case BTF_KIND_FUNC:
+               return 0;
+       case BTF_KIND_INT:
+               *(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
+               return 0;
+       case BTF_KIND_ENUM:
+               for (i = 0, e = btf_enum(t); i < vlen; i++, e++) {
+                       e->name_off = bswap_32(e->name_off);
+                       e->val = bswap_32(e->val);
+               }
+               return 0;
+       case BTF_KIND_ARRAY:
+               a = btf_array(t);
+               a->type = bswap_32(a->type);
+               a->index_type = bswap_32(a->index_type);
+               a->nelems = bswap_32(a->nelems);
+               return 0;
+       case BTF_KIND_STRUCT:
+       case BTF_KIND_UNION:
+               for (i = 0, m = btf_members(t); i < vlen; i++, m++) {
+                       m->name_off = bswap_32(m->name_off);
+                       m->type = bswap_32(m->type);
+                       m->offset = bswap_32(m->offset);
+               }
+               return 0;
+       case BTF_KIND_FUNC_PROTO:
+               for (i = 0, p = btf_params(t); i < vlen; i++, p++) {
+                       p->name_off = bswap_32(p->name_off);
+                       p->type = bswap_32(p->type);
+               }
+               return 0;
+       case BTF_KIND_VAR:
+               btf_var(t)->linkage = bswap_32(btf_var(t)->linkage);
+               return 0;
+       case BTF_KIND_DATASEC:
+               for (i = 0, v = btf_var_secinfos(t); i < vlen; i++, v++) {
+                       v->type = bswap_32(v->type);
+                       v->offset = bswap_32(v->offset);
+                       v->size = bswap_32(v->size);
+               }
+               return 0;
+       default:
+               pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
+               return -EINVAL;
+       }
+}
+
 static int btf_parse_type_sec(struct btf *btf)
 {
        struct btf_header *hdr = btf->hdr;
-       void *nohdr_data = btf->nohdr_data;
-       void *next_type = nohdr_data + hdr->type_off;
-       void *end_type = nohdr_data + hdr->str_off;
+       void *next_type = btf->types_data;
+       void *end_type = next_type + hdr->type_len;
+       int err, i = 0, type_size;
+
+       /* VOID (type_id == 0) is specially handled by btf__get_type_by_id(),
+        * so ensure we can never properly use its offset from index by
+        * setting it to a large value
+        */
+       err = btf_add_type_idx_entry(btf, UINT_MAX);
+       if (err)
+               return err;
 
-       while (next_type < end_type) {
-               struct btf_type *t = next_type;
-               int type_size;
-               int err;
+       while (next_type + sizeof(struct btf_type) <= end_type) {
+               i++;
 
-               type_size = btf_type_size(t);
+               if (btf->swapped_endian)
+                       btf_bswap_type_base(next_type);
+
+               type_size = btf_type_size(next_type);
                if (type_size < 0)
                        return type_size;
-               next_type += type_size;
-               err = btf_add_type(btf, t);
+               if (next_type + type_size > end_type) {
+                       pr_warn("BTF type [%d] is malformed\n", i);
+                       return -EINVAL;
+               }
+
+               if (btf->swapped_endian && btf_bswap_type_rest(next_type))
+                       return -EINVAL;
+
+               err = btf_add_type_idx_entry(btf, next_type - btf->types_data);
                if (err)
                        return err;
+
+               next_type += type_size;
+               btf->nr_types++;
+       }
+
+       if (next_type != end_type) {
+               pr_warn("BTF types data is malformed\n");
+               return -EINVAL;
        }
 
        return 0;
@@ -211,12 +412,20 @@ __u32 btf__get_nr_types(const struct btf *btf)
        return btf->nr_types;
 }
 
+/* internal helper returning non-const pointer to a type */
+static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
+{
+       if (type_id == 0)
+               return &btf_void;
+
+       return btf->types_data + btf->type_offs[type_id];
+}
+
 const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
 {
        if (type_id > btf->nr_types)
                return NULL;
-
-       return btf->types[type_id];
+       return btf_type_by_id((struct btf *)btf, type_id);
 }
 
 static int determine_ptr_size(const struct btf *btf)
@@ -283,6 +492,38 @@ int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
        return 0;
 }
 
+static bool is_host_big_endian(void)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+       return false;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+       return true;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+}
+
+enum btf_endianness btf__endianness(const struct btf *btf)
+{
+       if (is_host_big_endian())
+               return btf->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN;
+       else
+               return btf->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN;
+}
+
+int btf__set_endianness(struct btf *btf, enum btf_endianness endian)
+{
+       if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN)
+               return -EINVAL;
+
+       btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN);
+       if (!btf->swapped_endian) {
+               free(btf->raw_data_swapped);
+               btf->raw_data_swapped = NULL;
+       }
+       return 0;
+}
+
 static bool btf_type_is_void(const struct btf_type *t)
 {
        return t == &btf_void || btf_is_fwd(t);
@@ -414,7 +655,7 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
                return 0;
 
        for (i = 1; i <= btf->nr_types; i++) {
-               const struct btf_type *t = btf->types[i];
+               const struct btf_type *t = btf__type_by_id(btf, i);
                const char *name = btf__name_by_offset(btf, t->name_off);
 
                if (name && !strcmp(type_name, name))
@@ -433,7 +674,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
                return 0;
 
        for (i = 1; i <= btf->nr_types; i++) {
-               const struct btf_type *t = btf->types[i];
+               const struct btf_type *t = btf__type_by_id(btf, i);
                const char *name;
 
                if (btf_kind(t) != kind)
@@ -446,6 +687,11 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
        return -ENOENT;
 }
 
+static bool btf_is_modifiable(const struct btf *btf)
+{
+       return (void *)btf->hdr != btf->raw_data;
+}
+
 void btf__free(struct btf *btf)
 {
        if (IS_ERR_OR_NULL(btf))
@@ -454,11 +700,55 @@ void btf__free(struct btf *btf)
        if (btf->fd >= 0)
                close(btf->fd);
 
-       free(btf->data);
-       free(btf->types);
+       if (btf_is_modifiable(btf)) {
+               /* if BTF was modified after loading, it will have a split
+                * in-memory representation for header, types, and strings
+                * sections, so we need to free all of them individually. It
+                * might still have a cached contiguous raw data present,
+                * which will be unconditionally freed below.
+                */
+               free(btf->hdr);
+               free(btf->types_data);
+               free(btf->strs_data);
+       }
+       free(btf->raw_data);
+       free(btf->raw_data_swapped);
+       free(btf->type_offs);
        free(btf);
 }
 
+struct btf *btf__new_empty(void)
+{
+       struct btf *btf;
+
+       btf = calloc(1, sizeof(*btf));
+       if (!btf)
+               return ERR_PTR(-ENOMEM);
+
+       btf->fd = -1;
+       btf->ptr_sz = sizeof(void *);
+       btf->swapped_endian = false;
+
+       /* +1 for empty string at offset 0 */
+       btf->raw_size = sizeof(struct btf_header) + 1;
+       btf->raw_data = calloc(1, btf->raw_size);
+       if (!btf->raw_data) {
+               free(btf);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       btf->hdr = btf->raw_data;
+       btf->hdr->hdr_len = sizeof(struct btf_header);
+       btf->hdr->magic = BTF_MAGIC;
+       btf->hdr->version = BTF_VERSION;
+
+       btf->types_data = btf->raw_data + btf->hdr->hdr_len;
+       btf->strs_data = btf->raw_data + btf->hdr->hdr_len;
+       btf->hdr->str_len = 1; /* empty string at offset 0 */
+
+       return btf;
+}
+
 struct btf *btf__new(const void *data, __u32 size)
 {
        struct btf *btf;
@@ -468,26 +758,28 @@ struct btf *btf__new(const void *data, __u32 size)
        if (!btf)
                return ERR_PTR(-ENOMEM);
 
-       btf->fd = -1;
-
-       btf->data = malloc(size);
-       if (!btf->data) {
+       btf->raw_data = malloc(size);
+       if (!btf->raw_data) {
                err = -ENOMEM;
                goto done;
        }
+       memcpy(btf->raw_data, data, size);
+       btf->raw_size = size;
 
-       memcpy(btf->data, data, size);
-       btf->data_size = size;
-
+       btf->hdr = btf->raw_data;
        err = btf_parse_hdr(btf);
        if (err)
                goto done;
 
+       btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off;
+       btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off;
+
        err = btf_parse_str_sec(btf);
+       err = err ?: btf_parse_type_sec(btf);
        if (err)
                goto done;
 
-       err = btf_parse_type_sec(btf);
+       btf->fd = -1;
 
 done:
        if (err) {
@@ -498,17 +790,6 @@ done:
        return btf;
 }
 
-static bool btf_check_endianness(const GElf_Ehdr *ehdr)
-{
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-       return ehdr->e_ident[EI_DATA] == ELFDATA2LSB;
-#elif __BYTE_ORDER == __BIG_ENDIAN
-       return ehdr->e_ident[EI_DATA] == ELFDATA2MSB;
-#else
-# error "Unrecognized __BYTE_ORDER__"
-#endif
-}
-
 struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
 {
        Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
@@ -541,10 +822,6 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
                pr_warn("failed to get EHDR from %s\n", path);
                goto done;
        }
-       if (!btf_check_endianness(&ehdr)) {
-               pr_warn("non-native ELF endianness is not supported\n");
-               goto done;
-       }
        if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
                pr_warn("failed to get e_shstrndx from %s\n", path);
                goto done;
@@ -656,7 +933,7 @@ struct btf *btf__parse_raw(const char *path)
                err = -EIO;
                goto err_out;
        }
-       if (magic != BTF_MAGIC) {
+       if (magic != BTF_MAGIC && magic != bswap_16(BTF_MAGIC)) {
                /* definitely not a raw BTF */
                err = -EPROTO;
                goto err_out;
@@ -789,7 +1066,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
        __u32 i;
 
        for (i = 1; i <= btf->nr_types; i++) {
-               struct btf_type *t = btf->types[i];
+               struct btf_type *t = btf_type_by_id(btf, i);
 
                /* Loader needs to fix up some of the things compiler
                 * couldn't get its hands on while emitting BTF. This
@@ -806,202 +1083,1245 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
        return err;
 }
 
+static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
+
 int btf__load(struct btf *btf)
 {
-       __u32 log_buf_size = 0;
+       __u32 log_buf_size = 0, raw_size;
        char *log_buf = NULL;
+       void *raw_data;
        int err = 0;
 
-       if (btf->fd >= 0)
-               return -EEXIST;
+       if (btf->fd >= 0)
+               return -EEXIST;
+
+retry_load:
+       if (log_buf_size) {
+               log_buf = malloc(log_buf_size);
+               if (!log_buf)
+                       return -ENOMEM;
+
+               *log_buf = 0;
+       }
+
+       raw_data = btf_get_raw_data(btf, &raw_size, false);
+       if (!raw_data) {
+               err = -ENOMEM;
+               goto done;
+       }
+       /* cache native raw data representation */
+       btf->raw_size = raw_size;
+       btf->raw_data = raw_data;
+
+       btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false);
+       if (btf->fd < 0) {
+               if (!log_buf || errno == ENOSPC) {
+                       log_buf_size = max((__u32)BPF_LOG_BUF_SIZE,
+                                          log_buf_size << 1);
+                       free(log_buf);
+                       goto retry_load;
+               }
+
+               err = -errno;
+               pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
+               if (*log_buf)
+                       pr_warn("%s\n", log_buf);
+               goto done;
+       }
+
+done:
+       free(log_buf);
+       return err;
+}
+
+int btf__fd(const struct btf *btf)
+{
+       return btf->fd;
+}
+
+void btf__set_fd(struct btf *btf, int fd)
+{
+       btf->fd = fd;
+}
+
+static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian)
+{
+       struct btf_header *hdr = btf->hdr;
+       struct btf_type *t;
+       void *data, *p;
+       __u32 data_sz;
+       int i;
+
+       data = swap_endian ? btf->raw_data_swapped : btf->raw_data;
+       if (data) {
+               *size = btf->raw_size;
+               return data;
+       }
+
+       data_sz = hdr->hdr_len + hdr->type_len + hdr->str_len;
+       data = calloc(1, data_sz);
+       if (!data)
+               return NULL;
+       p = data;
+
+       memcpy(p, hdr, hdr->hdr_len);
+       if (swap_endian)
+               btf_bswap_hdr(p);
+       p += hdr->hdr_len;
+
+       memcpy(p, btf->types_data, hdr->type_len);
+       if (swap_endian) {
+               for (i = 1; i <= btf->nr_types; i++) {
+                       t = p  + btf->type_offs[i];
+                       /* btf_bswap_type_rest() relies on native t->info, so
+                        * we swap base type info after we swapped all the
+                        * additional information
+                        */
+                       if (btf_bswap_type_rest(t))
+                               goto err_out;
+                       btf_bswap_type_base(t);
+               }
+       }
+       p += hdr->type_len;
+
+       memcpy(p, btf->strs_data, hdr->str_len);
+       p += hdr->str_len;
+
+       *size = data_sz;
+       return data;
+err_out:
+       free(data);
+       return NULL;
+}
+
+const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
+{
+       struct btf *btf = (struct btf *)btf_ro;
+       __u32 data_sz;
+       void *data;
+
+       data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian);
+       if (!data)
+               return NULL;
+
+       btf->raw_size = data_sz;
+       if (btf->swapped_endian)
+               btf->raw_data_swapped = data;
+       else
+               btf->raw_data = data;
+       *size = data_sz;
+       return data;
+}
+
+const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
+{
+       if (offset < btf->hdr->str_len)
+               return btf->strs_data + offset;
+       else
+               return NULL;
+}
+
+const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
+{
+       return btf__str_by_offset(btf, offset);
+}
+
+int btf__get_from_id(__u32 id, struct btf **btf)
+{
+       struct bpf_btf_info btf_info = { 0 };
+       __u32 len = sizeof(btf_info);
+       __u32 last_size;
+       int btf_fd;
+       void *ptr;
+       int err;
+
+       err = 0;
+       *btf = NULL;
+       btf_fd = bpf_btf_get_fd_by_id(id);
+       if (btf_fd < 0)
+               return 0;
+
+       /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+        * let's start with a sane default - 4KiB here - and resize it only if
+        * bpf_obj_get_info_by_fd() needs a bigger buffer.
+        */
+       btf_info.btf_size = 4096;
+       last_size = btf_info.btf_size;
+       ptr = malloc(last_size);
+       if (!ptr) {
+               err = -ENOMEM;
+               goto exit_free;
+       }
+
+       memset(ptr, 0, last_size);
+       btf_info.btf = ptr_to_u64(ptr);
+       err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+
+       if (!err && btf_info.btf_size > last_size) {
+               void *temp_ptr;
+
+               last_size = btf_info.btf_size;
+               temp_ptr = realloc(ptr, last_size);
+               if (!temp_ptr) {
+                       err = -ENOMEM;
+                       goto exit_free;
+               }
+               ptr = temp_ptr;
+               memset(ptr, 0, last_size);
+               btf_info.btf = ptr_to_u64(ptr);
+               err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+       }
+
+       if (err || btf_info.btf_size > last_size) {
+               err = errno;
+               goto exit_free;
+       }
+
+       *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size);
+       if (IS_ERR(*btf)) {
+               err = PTR_ERR(*btf);
+               *btf = NULL;
+       }
+
+exit_free:
+       close(btf_fd);
+       free(ptr);
+
+       return err;
+}
+
+int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
+                        __u32 expected_key_size, __u32 expected_value_size,
+                        __u32 *key_type_id, __u32 *value_type_id)
+{
+       const struct btf_type *container_type;
+       const struct btf_member *key, *value;
+       const size_t max_name = 256;
+       char container_name[max_name];
+       __s64 key_size, value_size;
+       __s32 container_id;
+
+       if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
+           max_name) {
+               pr_warn("map:%s length of '____btf_map_%s' is too long\n",
+                       map_name, map_name);
+               return -EINVAL;
+       }
+
+       container_id = btf__find_by_name(btf, container_name);
+       if (container_id < 0) {
+               pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
+                        map_name, container_name);
+               return container_id;
+       }
+
+       container_type = btf__type_by_id(btf, container_id);
+       if (!container_type) {
+               pr_warn("map:%s cannot find BTF type for container_id:%u\n",
+                       map_name, container_id);
+               return -EINVAL;
+       }
+
+       if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
+               pr_warn("map:%s container_name:%s is an invalid container struct\n",
+                       map_name, container_name);
+               return -EINVAL;
+       }
+
+       key = btf_members(container_type);
+       value = key + 1;
+
+       key_size = btf__resolve_size(btf, key->type);
+       if (key_size < 0) {
+               pr_warn("map:%s invalid BTF key_type_size\n", map_name);
+               return key_size;
+       }
+
+       if (expected_key_size != key_size) {
+               pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
+                       map_name, (__u32)key_size, expected_key_size);
+               return -EINVAL;
+       }
+
+       value_size = btf__resolve_size(btf, value->type);
+       if (value_size < 0) {
+               pr_warn("map:%s invalid BTF value_type_size\n", map_name);
+               return value_size;
+       }
+
+       if (expected_value_size != value_size) {
+               pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
+                       map_name, (__u32)value_size, expected_value_size);
+               return -EINVAL;
+       }
+
+       *key_type_id = key->type;
+       *value_type_id = value->type;
+
+       return 0;
+}
+
+static size_t strs_hash_fn(const void *key, void *ctx)
+{
+       struct btf *btf = ctx;
+       const char *str = btf->strs_data + (long)key;
+
+       return str_hash(str);
+}
+
+static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
+{
+       struct btf *btf = ctx;
+       const char *str1 = btf->strs_data + (long)key1;
+       const char *str2 = btf->strs_data + (long)key2;
+
+       return strcmp(str1, str2) == 0;
+}
+
+static void btf_invalidate_raw_data(struct btf *btf)
+{
+       if (btf->raw_data) {
+               free(btf->raw_data);
+               btf->raw_data = NULL;
+       }
+       if (btf->raw_data_swapped) {
+               free(btf->raw_data_swapped);
+               btf->raw_data_swapped = NULL;
+       }
+}
+
+/* Ensure BTF is ready to be modified (by splitting into a three memory
+ * regions for header, types, and strings). Also invalidate cached
+ * raw_data, if any.
+ */
+static int btf_ensure_modifiable(struct btf *btf)
+{
+       void *hdr, *types, *strs, *strs_end, *s;
+       struct hashmap *hash = NULL;
+       long off;
+       int err;
+
+       if (btf_is_modifiable(btf)) {
+               /* any BTF modification invalidates raw_data */
+               btf_invalidate_raw_data(btf);
+               return 0;
+       }
+
+       /* split raw data into three memory regions */
+       hdr = malloc(btf->hdr->hdr_len);
+       types = malloc(btf->hdr->type_len);
+       strs = malloc(btf->hdr->str_len);
+       if (!hdr || !types || !strs)
+               goto err_out;
+
+       memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
+       memcpy(types, btf->types_data, btf->hdr->type_len);
+       memcpy(strs, btf->strs_data, btf->hdr->str_len);
+
+       /* build lookup index for all strings */
+       hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
+       if (IS_ERR(hash)) {
+               err = PTR_ERR(hash);
+               hash = NULL;
+               goto err_out;
+       }
+
+       strs_end = strs + btf->hdr->str_len;
+       for (off = 0, s = strs; s < strs_end; off += strlen(s) + 1, s = strs + off) {
+               /* hashmap__add() returns EEXIST if string with the same
+                * content already is in the hash map
+                */
+               err = hashmap__add(hash, (void *)off, (void *)off);
+               if (err == -EEXIST)
+                       continue; /* duplicate */
+               if (err)
+                       goto err_out;
+       }
+
+       /* only when everything was successful, update internal state */
+       btf->hdr = hdr;
+       btf->types_data = types;
+       btf->types_data_cap = btf->hdr->type_len;
+       btf->strs_data = strs;
+       btf->strs_data_cap = btf->hdr->str_len;
+       btf->strs_hash = hash;
+       /* if BTF was created from scratch, all strings are guaranteed to be
+        * unique and deduplicated
+        */
+       btf->strs_deduped = btf->hdr->str_len <= 1;
+
+       /* invalidate raw_data representation */
+       btf_invalidate_raw_data(btf);
+
+       return 0;
+
+err_out:
+       hashmap__free(hash);
+       free(hdr);
+       free(types);
+       free(strs);
+       return -ENOMEM;
+}
+
+static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
+{
+       return btf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1,
+                          btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz);
+}
+
+/* Find an offset in BTF string section that corresponds to a given string *s*.
+ * Returns:
+ *   - >0 offset into string section, if string is found;
+ *   - -ENOENT, if string is not in the string section;
+ *   - <0, on any other error.
+ */
+int btf__find_str(struct btf *btf, const char *s)
+{
+       long old_off, new_off, len;
+       void *p;
+
+       /* BTF needs to be in a modifiable state to build string lookup index */
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
+
+       /* see btf__add_str() for why we do this */
+       len = strlen(s) + 1;
+       p = btf_add_str_mem(btf, len);
+       if (!p)
+               return -ENOMEM;
+
+       new_off = btf->hdr->str_len;
+       memcpy(p, s, len);
+
+       if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off))
+               return old_off;
+
+       return -ENOENT;
+}
+
+/* Add a string s to the BTF string section.
+ * Returns:
+ *   - > 0 offset into string section, on success;
+ *   - < 0, on error.
+ */
+int btf__add_str(struct btf *btf, const char *s)
+{
+       long old_off, new_off, len;
+       void *p;
+       int err;
+
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
+
+       /* Hashmap keys are always offsets within btf->strs_data, so to even
+        * look up some string from the "outside", we need to first append it
+        * at the end, so that it can be addressed with an offset. Luckily,
+        * until btf->hdr->str_len is incremented, that string is just a piece
+        * of garbage for the rest of BTF code, so no harm, no foul. On the
+        * other hand, if the string is unique, it's already appended and
+        * ready to be used, only a simple btf->hdr->str_len increment away.
+        */
+       len = strlen(s) + 1;
+       p = btf_add_str_mem(btf, len);
+       if (!p)
+               return -ENOMEM;
+
+       new_off = btf->hdr->str_len;
+       memcpy(p, s, len);
+
+       /* Now attempt to add the string, but only if the string with the same
+        * contents doesn't exist already (HASHMAP_ADD strategy). If such
+        * string exists, we'll get its offset in old_off (that's old_key).
+        */
+       err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off,
+                             HASHMAP_ADD, (const void **)&old_off, NULL);
+       if (err == -EEXIST)
+               return old_off; /* duplicated string, return existing offset */
+       if (err)
+               return err;
+
+       btf->hdr->str_len += len; /* new unique string, adjust data length */
+       return new_off;
+}
+
+static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
+{
+       return btf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
+                          btf->hdr->type_len, UINT_MAX, add_sz);
+}
+
+static __u32 btf_type_info(int kind, int vlen, int kflag)
+{
+       return (kflag << 31) | (kind << 24) | vlen;
+}
+
+static void btf_type_inc_vlen(struct btf_type *t)
+{
+       t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
+}
+
+/*
+ * Append new BTF_KIND_INT type with:
+ *   - *name* - non-empty, non-NULL type name;
+ *   - *sz* - power-of-2 (1, 2, 4, ..) size of the type, in bytes;
+ *   - encoding is a combination of BTF_INT_SIGNED, BTF_INT_CHAR, BTF_INT_BOOL.
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding)
+{
+       struct btf_type *t;
+       int sz, err, name_off;
+
+       /* non-empty name */
+       if (!name || !name[0])
+               return -EINVAL;
+       /* byte_sz must be power of 2 */
+       if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
+               return -EINVAL;
+       if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL))
+               return -EINVAL;
+
+       /* deconstruct BTF, if necessary, and invalidate raw_data */
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
+
+       sz = sizeof(struct btf_type) + sizeof(int);
+       t = btf_add_type_mem(btf, sz);
+       if (!t)
+               return -ENOMEM;
+
+       /* if something goes wrong later, we might end up with an extra string,
+        * but that shouldn't be a problem, because BTF can't be constructed
+        * completely anyway and will most probably be just discarded
+        */
+       name_off = btf__add_str(btf, name);
+       if (name_off < 0)
+               return name_off;
+
+       t->name_off = name_off;
+       t->info = btf_type_info(BTF_KIND_INT, 0, 0);
+       t->size = byte_sz;
+       /* set INT info, we don't allow setting legacy bit offset/size */
+       *(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8);
+
+       err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+       if (err)
+               return err;
+
+       btf->hdr->type_len += sz;
+       btf->hdr->str_off += sz;
+       btf->nr_types++;
+       return btf->nr_types;
+}
+
+/* it's completely legal to append BTF types with type IDs pointing forward to
+ * types that haven't been appended yet, so we only make sure that id looks
+ * sane, we can't guarantee that ID will always be valid
+ */
+static int validate_type_id(int id)
+{
+       if (id < 0 || id > BTF_MAX_NR_TYPES)
+               return -EINVAL;
+       return 0;
+}
+
+/* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */
+static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id)
+{
+       struct btf_type *t;
+       int sz, name_off = 0, err;
+
+       if (validate_type_id(ref_type_id))
+               return -EINVAL;
+
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
+
+       sz = sizeof(struct btf_type);
+       t = btf_add_type_mem(btf, sz);
+       if (!t)
+               return -ENOMEM;
+
+       if (name && name[0]) {
+               name_off = btf__add_str(btf, name);
+               if (name_off < 0)
+                       return name_off;
+       }
+
+       t->name_off = name_off;
+       t->info = btf_type_info(kind, 0, 0);
+       t->type = ref_type_id;
+
+       err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+       if (err)
+               return err;
+
+       btf->hdr->type_len += sz;
+       btf->hdr->str_off += sz;
+       btf->nr_types++;
+       return btf->nr_types;
+}
+
+/*
+ * Append new BTF_KIND_PTR type with:
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_ptr(struct btf *btf, int ref_type_id)
+{
+       return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_ARRAY type with:
+ *   - *index_type_id* - type ID of the type describing array index;
+ *   - *elem_type_id* - type ID of the type describing array element;
+ *   - *nr_elems* - the size of the array;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 nr_elems)
+{
+       struct btf_type *t;
+       struct btf_array *a;
+       int sz, err;
+
+       if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
+               return -EINVAL;
+
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
+
+       sz = sizeof(struct btf_type) + sizeof(struct btf_array);
+       t = btf_add_type_mem(btf, sz);
+       if (!t)
+               return -ENOMEM;
+
+       t->name_off = 0;
+       t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0);
+       t->size = 0;
+
+       a = btf_array(t);
+       a->type = elem_type_id;
+       a->index_type = index_type_id;
+       a->nelems = nr_elems;
+
+       err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+       if (err)
+               return err;
+
+       btf->hdr->type_len += sz;
+       btf->hdr->str_off += sz;
+       btf->nr_types++;
+       return btf->nr_types;
+}
+
+/* generic STRUCT/UNION append function */
+static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz)
+{
+       struct btf_type *t;
+       int sz, err, name_off = 0;
+
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
+
+       sz = sizeof(struct btf_type);
+       t = btf_add_type_mem(btf, sz);
+       if (!t)
+               return -ENOMEM;
+
+       if (name && name[0]) {
+               name_off = btf__add_str(btf, name);
+               if (name_off < 0)
+                       return name_off;
+       }
+
+       /* start out with vlen=0 and no kflag; this will be adjusted when
+        * adding each member
+        */
+       t->name_off = name_off;
+       t->info = btf_type_info(kind, 0, 0);
+       t->size = bytes_sz;
+
+       err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+       if (err)
+               return err;
+
+       btf->hdr->type_len += sz;
+       btf->hdr->str_off += sz;
+       btf->nr_types++;
+       return btf->nr_types;
+}
+
+/*
+ * Append new BTF_KIND_STRUCT type with:
+ *   - *name* - name of the struct, can be NULL or empty for anonymous structs;
+ *   - *byte_sz* - size of the struct, in bytes;
+ *
+ * Struct initially has no fields in it. Fields can be added by
+ * btf__add_field() right after btf__add_struct() succeeds. 
+ *
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_struct(struct btf *btf, const char *name, __u32 byte_sz)
+{
+       return btf_add_composite(btf, BTF_KIND_STRUCT, name, byte_sz);
+}
+
+/*
+ * Append new BTF_KIND_UNION type with:
+ *   - *name* - name of the union, can be NULL or empty for anonymous union;
+ *   - *byte_sz* - size of the union, in bytes;
+ *
+ * Union initially has no fields in it. Fields can be added by
+ * btf__add_field() right after btf__add_union() succeeds. All fields
+ * should have *bit_offset* of 0.
+ *
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz)
+{
+       return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz);
+}
+
+/*
+ * Append new field for the current STRUCT/UNION type with:
+ *   - *name* - name of the field, can be NULL or empty for anonymous field;
+ *   - *type_id* - type ID for the type describing field type;
+ *   - *bit_offset* - bit offset of the start of the field within struct/union;
+ *   - *bit_size* - bit size of a bitfield, 0 for non-bitfield fields;
+ * Returns:
+ *   -  0, on success;
+ *   - <0, on error.
+ */
+int btf__add_field(struct btf *btf, const char *name, int type_id,
+                  __u32 bit_offset, __u32 bit_size)
+{
+       struct btf_type *t;
+       struct btf_member *m;
+       bool is_bitfield;
+       int sz, name_off = 0;
+
+       /* last type should be union/struct */
+       if (btf->nr_types == 0)
+               return -EINVAL;
+       t = btf_type_by_id(btf, btf->nr_types);
+       if (!btf_is_composite(t))
+               return -EINVAL;
+
+       if (validate_type_id(type_id))
+               return -EINVAL;
+       /* best-effort bit field offset/size enforcement */
+       is_bitfield = bit_size || (bit_offset % 8 != 0);
+       if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff))
+               return -EINVAL;
+
+       /* only offset 0 is allowed for unions */
+       if (btf_is_union(t) && bit_offset)
+               return -EINVAL;
+
+       /* decompose and invalidate raw data */
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
+
+       sz = sizeof(struct btf_member);
+       m = btf_add_type_mem(btf, sz);
+       if (!m)
+               return -ENOMEM;
+
+       if (name && name[0]) {
+               name_off = btf__add_str(btf, name);
+               if (name_off < 0)
+                       return name_off;
+       }
+
+       m->name_off = name_off;
+       m->type = type_id;
+       m->offset = bit_offset | (bit_size << 24);
+
+       /* btf_add_type_mem can invalidate t pointer */
+       t = btf_type_by_id(btf, btf->nr_types);
+       /* update parent type's vlen and kflag */
+       t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t));
+
+       btf->hdr->type_len += sz;
+       btf->hdr->str_off += sz;
+       return 0;
+}
+
+/*
+ * Append new BTF_KIND_ENUM type with:
+ *   - *name* - name of the enum, can be NULL or empty for anonymous enums;
+ *   - *byte_sz* - size of the enum, in bytes.
+ *
+ * Enum initially has no enum values in it (and corresponds to enum forward
+ * declaration). Enumerator values can be added by btf__add_enum_value()
+ * immediately after btf__add_enum() succeeds.
+ *
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
+{
+       struct btf_type *t;
+       int sz, err, name_off = 0;
+
+       /* byte_sz must be power of 2 */
+       if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
+               return -EINVAL;
+
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
+
+       sz = sizeof(struct btf_type);
+       t = btf_add_type_mem(btf, sz);
+       if (!t)
+               return -ENOMEM;
+
+       if (name && name[0]) {
+               name_off = btf__add_str(btf, name);
+               if (name_off < 0)
+                       return name_off;
+       }
+
+       /* start out with vlen=0; it will be adjusted when adding enum values */
+       t->name_off = name_off;
+       t->info = btf_type_info(BTF_KIND_ENUM, 0, 0);
+       t->size = byte_sz;
+
+       err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+       if (err)
+               return err;
+
+       btf->hdr->type_len += sz;
+       btf->hdr->str_off += sz;
+       btf->nr_types++;
+       return btf->nr_types;
+}
+
+/*
+ * Append new enum value for the current ENUM type with:
+ *   - *name* - name of the enumerator value, can't be NULL or empty;
+ *   - *value* - integer value corresponding to enum value *name*;
+ * Returns:
+ *   -  0, on success;
+ *   - <0, on error.
+ */
+int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
+{
+       struct btf_type *t;
+       struct btf_enum *v;
+       int sz, name_off;
+
+       /* last type should be BTF_KIND_ENUM */
+       if (btf->nr_types == 0)
+               return -EINVAL;
+       t = btf_type_by_id(btf, btf->nr_types);
+       if (!btf_is_enum(t))
+               return -EINVAL;
+
+       /* non-empty name */
+       if (!name || !name[0])
+               return -EINVAL;
+       if (value < INT_MIN || value > UINT_MAX)
+               return -E2BIG;
+
+       /* decompose and invalidate raw data */
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
+
+       sz = sizeof(struct btf_enum);
+       v = btf_add_type_mem(btf, sz);
+       if (!v)
+               return -ENOMEM;
 
-retry_load:
-       if (log_buf_size) {
-               log_buf = malloc(log_buf_size);
-               if (!log_buf)
-                       return -ENOMEM;
+       name_off = btf__add_str(btf, name);
+       if (name_off < 0)
+               return name_off;
 
-               *log_buf = 0;
-       }
+       v->name_off = name_off;
+       v->val = value;
 
-       btf->fd = bpf_load_btf(btf->data, btf->data_size,
-                              log_buf, log_buf_size, false);
-       if (btf->fd < 0) {
-               if (!log_buf || errno == ENOSPC) {
-                       log_buf_size = max((__u32)BPF_LOG_BUF_SIZE,
-                                          log_buf_size << 1);
-                       free(log_buf);
-                       goto retry_load;
-               }
+       /* update parent type's vlen */
+       t = btf_type_by_id(btf, btf->nr_types);
+       btf_type_inc_vlen(t);
 
-               err = -errno;
-               pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
-               if (*log_buf)
-                       pr_warn("%s\n", log_buf);
-               goto done;
-       }
+       btf->hdr->type_len += sz;
+       btf->hdr->str_off += sz;
+       return 0;
+}
 
-done:
-       free(log_buf);
-       return err;
+/*
+ * Append new BTF_KIND_FWD type with:
+ *   - *name*, non-empty/non-NULL name;
+ *   - *fwd_kind*, kind of forward declaration, one of BTF_FWD_STRUCT,
+ *     BTF_FWD_UNION, or BTF_FWD_ENUM;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
+{
+       if (!name || !name[0])
+               return -EINVAL;
+
+       switch (fwd_kind) {
+       case BTF_FWD_STRUCT:
+       case BTF_FWD_UNION: {
+               struct btf_type *t;
+               int id;
+
+               id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0);
+               if (id <= 0)
+                       return id;
+               t = btf_type_by_id(btf, id);
+               t->info = btf_type_info(BTF_KIND_FWD, 0, fwd_kind == BTF_FWD_UNION);
+               return id;
+       }
+       case BTF_FWD_ENUM:
+               /* enum forward in BTF currently is just an enum with no enum
+                * values; we also assume a standard 4-byte size for it
+                */
+               return btf__add_enum(btf, name, sizeof(int));
+       default:
+               return -EINVAL;
+       }
 }
 
-int btf__fd(const struct btf *btf)
+/*
+ * Append new BTF_KING_TYPEDEF type with:
+ *   - *name*, non-empty/non-NULL name;
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
 {
-       return btf->fd;
+       if (!name || !name[0])
+               return -EINVAL;
+
+       return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
 }
 
-void btf__set_fd(struct btf *btf, int fd)
+/*
+ * Append new BTF_KIND_VOLATILE type with:
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_volatile(struct btf *btf, int ref_type_id)
 {
-       btf->fd = fd;
+       return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id);
 }
 
-const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
+/*
+ * Append new BTF_KIND_CONST type with:
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_const(struct btf *btf, int ref_type_id)
 {
-       *size = btf->data_size;
-       return btf->data;
+       return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id);
 }
 
-const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
+/*
+ * Append new BTF_KIND_RESTRICT type with:
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_restrict(struct btf *btf, int ref_type_id)
 {
-       if (offset < btf->hdr->str_len)
-               return &btf->strings[offset];
-       else
-               return NULL;
+       return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id);
 }
 
-int btf__get_from_id(__u32 id, struct btf **btf)
+/*
+ * Append new BTF_KIND_FUNC type with:
+ *   - *name*, non-empty/non-NULL name;
+ *   - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_func(struct btf *btf, const char *name,
+                 enum btf_func_linkage linkage, int proto_type_id)
 {
-       struct bpf_btf_info btf_info = { 0 };
-       __u32 len = sizeof(btf_info);
-       __u32 last_size;
-       int btf_fd;
-       void *ptr;
-       int err;
+       int id;
 
-       err = 0;
-       *btf = NULL;
-       btf_fd = bpf_btf_get_fd_by_id(id);
-       if (btf_fd < 0)
-               return 0;
+       if (!name || !name[0])
+               return -EINVAL;
+       if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
+           linkage != BTF_FUNC_EXTERN)
+               return -EINVAL;
 
-       /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
-        * let's start with a sane default - 4KiB here - and resize it only if
-        * bpf_obj_get_info_by_fd() needs a bigger buffer.
-        */
-       btf_info.btf_size = 4096;
-       last_size = btf_info.btf_size;
-       ptr = malloc(last_size);
-       if (!ptr) {
-               err = -ENOMEM;
-               goto exit_free;
+       id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
+       if (id > 0) {
+               struct btf_type *t = btf_type_by_id(btf, id);
+
+               t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0);
        }
+       return id;
+}
 
-       memset(ptr, 0, last_size);
-       btf_info.btf = ptr_to_u64(ptr);
-       err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+/*
+ * Append new BTF_KIND_FUNC_PROTO with:
+ *   - *ret_type_id* - type ID for return result of a function.
+ *
+ * Function prototype initially has no arguments, but they can be added by
+ * btf__add_func_param() one by one, immediately after
+ * btf__add_func_proto() succeeded.
+ *
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_func_proto(struct btf *btf, int ret_type_id)
+{
+       struct btf_type *t;
+       int sz, err;
 
-       if (!err && btf_info.btf_size > last_size) {
-               void *temp_ptr;
+       if (validate_type_id(ret_type_id))
+               return -EINVAL;
 
-               last_size = btf_info.btf_size;
-               temp_ptr = realloc(ptr, last_size);
-               if (!temp_ptr) {
-                       err = -ENOMEM;
-                       goto exit_free;
-               }
-               ptr = temp_ptr;
-               memset(ptr, 0, last_size);
-               btf_info.btf = ptr_to_u64(ptr);
-               err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
-       }
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
 
-       if (err || btf_info.btf_size > last_size) {
-               err = errno;
-               goto exit_free;
-       }
+       sz = sizeof(struct btf_type);
+       t = btf_add_type_mem(btf, sz);
+       if (!t)
+               return -ENOMEM;
 
-       *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size);
-       if (IS_ERR(*btf)) {
-               err = PTR_ERR(*btf);
-               *btf = NULL;
-       }
+       /* start out with vlen=0; this will be adjusted when adding enum
+        * values, if necessary
+        */
+       t->name_off = 0;
+       t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0);
+       t->type = ret_type_id;
 
-exit_free:
-       close(btf_fd);
-       free(ptr);
+       err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+       if (err)
+               return err;
 
-       return err;
+       btf->hdr->type_len += sz;
+       btf->hdr->str_off += sz;
+       btf->nr_types++;
+       return btf->nr_types;
 }
 
-int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
-                        __u32 expected_key_size, __u32 expected_value_size,
-                        __u32 *key_type_id, __u32 *value_type_id)
+/*
+ * Append new function parameter for current FUNC_PROTO type with:
+ *   - *name* - parameter name, can be NULL or empty;
+ *   - *type_id* - type ID describing the type of the parameter.
+ * Returns:
+ *   -  0, on success;
+ *   - <0, on error.
+ */
+int btf__add_func_param(struct btf *btf, const char *name, int type_id)
 {
-       const struct btf_type *container_type;
-       const struct btf_member *key, *value;
-       const size_t max_name = 256;
-       char container_name[max_name];
-       __s64 key_size, value_size;
-       __s32 container_id;
+       struct btf_type *t;
+       struct btf_param *p;
+       int sz, name_off = 0;
 
-       if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
-           max_name) {
-               pr_warn("map:%s length of '____btf_map_%s' is too long\n",
-                       map_name, map_name);
+       if (validate_type_id(type_id))
                return -EINVAL;
-       }
-
-       container_id = btf__find_by_name(btf, container_name);
-       if (container_id < 0) {
-               pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
-                        map_name, container_name);
-               return container_id;
-       }
 
-       container_type = btf__type_by_id(btf, container_id);
-       if (!container_type) {
-               pr_warn("map:%s cannot find BTF type for container_id:%u\n",
-                       map_name, container_id);
+       /* last type should be BTF_KIND_FUNC_PROTO */
+       if (btf->nr_types == 0)
+               return -EINVAL;
+       t = btf_type_by_id(btf, btf->nr_types);
+       if (!btf_is_func_proto(t))
                return -EINVAL;
+
+       /* decompose and invalidate raw data */
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
+
+       sz = sizeof(struct btf_param);
+       p = btf_add_type_mem(btf, sz);
+       if (!p)
+               return -ENOMEM;
+
+       if (name && name[0]) {
+               name_off = btf__add_str(btf, name);
+               if (name_off < 0)
+                       return name_off;
        }
 
-       if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
-               pr_warn("map:%s container_name:%s is an invalid container struct\n",
-                       map_name, container_name);
+       p->name_off = name_off;
+       p->type = type_id;
+
+       /* update parent type's vlen */
+       t = btf_type_by_id(btf, btf->nr_types);
+       btf_type_inc_vlen(t);
+
+       btf->hdr->type_len += sz;
+       btf->hdr->str_off += sz;
+       return 0;
+}
+
+/*
+ * Append new BTF_KIND_VAR type with:
+ *   - *name* - non-empty/non-NULL name;
+ *   - *linkage* - variable linkage, one of BTF_VAR_STATIC,
+ *     BTF_VAR_GLOBAL_ALLOCATED, or BTF_VAR_GLOBAL_EXTERN;
+ *   - *type_id* - type ID of the type describing the type of the variable.
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
+{
+       struct btf_type *t;
+       struct btf_var *v;
+       int sz, err, name_off;
+
+       /* non-empty name */
+       if (!name || !name[0])
+               return -EINVAL;
+       if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+           linkage != BTF_VAR_GLOBAL_EXTERN)
+               return -EINVAL;
+       if (validate_type_id(type_id))
                return -EINVAL;
-       }
 
-       key = btf_members(container_type);
-       value = key + 1;
+       /* deconstruct BTF, if necessary, and invalidate raw_data */
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
 
-       key_size = btf__resolve_size(btf, key->type);
-       if (key_size < 0) {
-               pr_warn("map:%s invalid BTF key_type_size\n", map_name);
-               return key_size;
-       }
+       sz = sizeof(struct btf_type) + sizeof(struct btf_var);
+       t = btf_add_type_mem(btf, sz);
+       if (!t)
+               return -ENOMEM;
 
-       if (expected_key_size != key_size) {
-               pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
-                       map_name, (__u32)key_size, expected_key_size);
+       name_off = btf__add_str(btf, name);
+       if (name_off < 0)
+               return name_off;
+
+       t->name_off = name_off;
+       t->info = btf_type_info(BTF_KIND_VAR, 0, 0);
+       t->type = type_id;
+
+       v = btf_var(t);
+       v->linkage = linkage;
+
+       err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+       if (err)
+               return err;
+
+       btf->hdr->type_len += sz;
+       btf->hdr->str_off += sz;
+       btf->nr_types++;
+       return btf->nr_types;
+}
+
+/*
+ * Append new BTF_KIND_DATASEC type with:
+ *   - *name* - non-empty/non-NULL name;
+ *   - *byte_sz* - data section size, in bytes.
+ *
+ * Data section is initially empty. Variables info can be added with
+ * btf__add_datasec_var_info() calls, after btf__add_datasec() succeeds.
+ *
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
+{
+       struct btf_type *t;
+       int sz, err, name_off;
+
+       /* non-empty name */
+       if (!name || !name[0])
                return -EINVAL;
-       }
 
-       value_size = btf__resolve_size(btf, value->type);
-       if (value_size < 0) {
-               pr_warn("map:%s invalid BTF value_type_size\n", map_name);
-               return value_size;
-       }
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
 
-       if (expected_value_size != value_size) {
-               pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
-                       map_name, (__u32)value_size, expected_value_size);
+       sz = sizeof(struct btf_type);
+       t = btf_add_type_mem(btf, sz);
+       if (!t)
+               return -ENOMEM;
+
+       name_off = btf__add_str(btf, name);
+       if (name_off < 0)
+               return name_off;
+
+       /* start with vlen=0, which will be update as var_secinfos are added */
+       t->name_off = name_off;
+       t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0);
+       t->size = byte_sz;
+
+       err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+       if (err)
+               return err;
+
+       btf->hdr->type_len += sz;
+       btf->hdr->str_off += sz;
+       btf->nr_types++;
+       return btf->nr_types;
+}
+
+/*
+ * Append new data section variable information entry for current DATASEC type:
+ *   - *var_type_id* - type ID, describing type of the variable;
+ *   - *offset* - variable offset within data section, in bytes;
+ *   - *byte_sz* - variable size, in bytes.
+ *
+ * Returns:
+ *   -  0, on success;
+ *   - <0, on error.
+ */
+int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz)
+{
+       struct btf_type *t;
+       struct btf_var_secinfo *v;
+       int sz;
+
+       /* last type should be BTF_KIND_DATASEC */
+       if (btf->nr_types == 0)
+               return -EINVAL;
+       t = btf_type_by_id(btf, btf->nr_types);
+       if (!btf_is_datasec(t))
                return -EINVAL;
-       }
 
-       *key_type_id = key->type;
-       *value_type_id = value->type;
+       if (validate_type_id(var_type_id))
+               return -EINVAL;
+
+       /* decompose and invalidate raw data */
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
+
+       sz = sizeof(struct btf_var_secinfo);
+       v = btf_add_type_mem(btf, sz);
+       if (!v)
+               return -ENOMEM;
+
+       v->type = var_type_id;
+       v->offset = offset;
+       v->size = byte_sz;
 
+       /* update parent type's vlen */
+       t = btf_type_by_id(btf, btf->nr_types);
+       btf_type_inc_vlen(t);
+
+       btf->hdr->type_len += sz;
+       btf->hdr->str_off += sz;
        return 0;
 }
 
@@ -1151,7 +2471,10 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
                return -EINVAL;
        }
 
-       if (hdr->magic != BTF_MAGIC) {
+       if (hdr->magic == bswap_16(BTF_MAGIC)) {
+               pr_warn("BTF.ext in non-native endianness is not supported\n");
+               return -ENOTSUP;
+       } else if (hdr->magic != BTF_MAGIC) {
                pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic);
                return -EINVAL;
        }
@@ -1471,6 +2794,9 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
                return -EINVAL;
        }
 
+       if (btf_ensure_modifiable(btf))
+               return -ENOMEM;
+
        err = btf_dedup_strings(d);
        if (err < 0) {
                pr_debug("btf_dedup_strings failed:%d\n", err);
@@ -1655,7 +2981,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
        /* special BTF "void" type is made canonical immediately */
        d->map[0] = 0;
        for (i = 1; i <= btf->nr_types; i++) {
-               struct btf_type *t = d->btf->types[i];
+               struct btf_type *t = btf_type_by_id(d->btf, i);
 
                /* VAR and DATASEC are never deduped and are self-canonical */
                if (btf_is_var(t) || btf_is_datasec(t))
@@ -1694,7 +3020,7 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
        struct btf_type *t;
 
        for (i = 1; i <= d->btf->nr_types; i++) {
-               t = d->btf->types[i];
+               t = btf_type_by_id(d->btf, i);
                r = fn(&t->name_off, ctx);
                if (r)
                        return r;
@@ -1848,8 +3174,7 @@ static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
  */
 static int btf_dedup_strings(struct btf_dedup *d)
 {
-       const struct btf_header *hdr = d->btf->hdr;
-       char *start = (char *)d->btf->nohdr_data + hdr->str_off;
+       char *start = d->btf->strs_data;
        char *end = start + d->btf->hdr->str_len;
        char *p = start, *tmp_strs = NULL;
        struct btf_str_ptrs strs = {
@@ -1861,6 +3186,9 @@ static int btf_dedup_strings(struct btf_dedup *d)
        int i, j, err = 0, grp_idx;
        bool grp_used;
 
+       if (d->btf->strs_deduped)
+               return 0;
+
        /* build index of all strings */
        while (p < end) {
                if (strs.cnt + 1 > strs.cap) {
@@ -1953,6 +3281,7 @@ static int btf_dedup_strings(struct btf_dedup *d)
                goto done;
 
        d->btf->hdr->str_len = end - start;
+       d->btf->strs_deduped = true;
 
 done:
        free(tmp_strs);
@@ -2229,7 +3558,7 @@ static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
  */
 static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 {
-       struct btf_type *t = d->btf->types[type_id];
+       struct btf_type *t = btf_type_by_id(d->btf, type_id);
        struct hashmap_entry *hash_entry;
        struct btf_type *cand;
        /* if we don't find equivalent type, then we are canonical */
@@ -2256,7 +3585,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
                h = btf_hash_int(t);
                for_each_dedup_cand(d, hash_entry, h) {
                        cand_id = (__u32)(long)hash_entry->value;
-                       cand = d->btf->types[cand_id];
+                       cand = btf_type_by_id(d->btf, cand_id);
                        if (btf_equal_int(t, cand)) {
                                new_id = cand_id;
                                break;
@@ -2268,7 +3597,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
                h = btf_hash_enum(t);
                for_each_dedup_cand(d, hash_entry, h) {
                        cand_id = (__u32)(long)hash_entry->value;
-                       cand = d->btf->types[cand_id];
+                       cand = btf_type_by_id(d->btf, cand_id);
                        if (btf_equal_enum(t, cand)) {
                                new_id = cand_id;
                                break;
@@ -2291,7 +3620,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
                h = btf_hash_common(t);
                for_each_dedup_cand(d, hash_entry, h) {
                        cand_id = (__u32)(long)hash_entry->value;
-                       cand = d->btf->types[cand_id];
+                       cand = btf_type_by_id(d->btf, cand_id);
                        if (btf_equal_common(t, cand)) {
                                new_id = cand_id;
                                break;
@@ -2350,13 +3679,13 @@ static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
 {
        __u32 orig_type_id = type_id;
 
-       if (!btf_is_fwd(d->btf->types[type_id]))
+       if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))
                return type_id;
 
        while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
                type_id = d->map[type_id];
 
-       if (!btf_is_fwd(d->btf->types[type_id]))
+       if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))
                return type_id;
 
        return orig_type_id;
@@ -2484,8 +3813,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
        if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
                return -ENOMEM;
 
-       cand_type = d->btf->types[cand_id];
-       canon_type = d->btf->types[canon_id];
+       cand_type = btf_type_by_id(d->btf, cand_id);
+       canon_type = btf_type_by_id(d->btf, canon_id);
        cand_kind = btf_kind(cand_type);
        canon_kind = btf_kind(canon_type);
 
@@ -2636,8 +3965,8 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
                targ_type_id = d->hypot_map[cand_type_id];
                t_id = resolve_type_id(d, targ_type_id);
                c_id = resolve_type_id(d, cand_type_id);
-               t_kind = btf_kind(d->btf->types[t_id]);
-               c_kind = btf_kind(d->btf->types[c_id]);
+               t_kind = btf_kind(btf__type_by_id(d->btf, t_id));
+               c_kind = btf_kind(btf__type_by_id(d->btf, c_id));
                /*
                 * Resolve FWD into STRUCT/UNION.
                 * It's ok to resolve FWD into STRUCT/UNION that's not yet
@@ -2705,7 +4034,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
        if (d->map[type_id] <= BTF_MAX_NR_TYPES)
                return 0;
 
-       t = d->btf->types[type_id];
+       t = btf_type_by_id(d->btf, type_id);
        kind = btf_kind(t);
 
        if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
@@ -2726,7 +4055,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
                 * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
                 * FWD and compatible STRUCT/UNION are considered equivalent.
                 */
-               cand_type = d->btf->types[cand_id];
+               cand_type = btf_type_by_id(d->btf, cand_id);
                if (!btf_shallow_equal_struct(t, cand_type))
                        continue;
 
@@ -2798,7 +4127,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
        if (d->map[type_id] <= BTF_MAX_NR_TYPES)
                return resolve_type_id(d, type_id);
 
-       t = d->btf->types[type_id];
+       t = btf_type_by_id(d->btf, type_id);
        d->map[type_id] = BTF_IN_PROGRESS_ID;
 
        switch (btf_kind(t)) {
@@ -2816,7 +4145,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
                h = btf_hash_common(t);
                for_each_dedup_cand(d, hash_entry, h) {
                        cand_id = (__u32)(long)hash_entry->value;
-                       cand = d->btf->types[cand_id];
+                       cand = btf_type_by_id(d->btf, cand_id);
                        if (btf_equal_common(t, cand)) {
                                new_id = cand_id;
                                break;
@@ -2840,7 +4169,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
                h = btf_hash_array(t);
                for_each_dedup_cand(d, hash_entry, h) {
                        cand_id = (__u32)(long)hash_entry->value;
-                       cand = d->btf->types[cand_id];
+                       cand = btf_type_by_id(d->btf, cand_id);
                        if (btf_equal_array(t, cand)) {
                                new_id = cand_id;
                                break;
@@ -2872,7 +4201,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
                h = btf_hash_fnproto(t);
                for_each_dedup_cand(d, hash_entry, h) {
                        cand_id = (__u32)(long)hash_entry->value;
-                       cand = d->btf->types[cand_id];
+                       cand = btf_type_by_id(d->btf, cand_id);
                        if (btf_equal_fnproto(t, cand)) {
                                new_id = cand_id;
                                break;
@@ -2920,9 +4249,9 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
  */
 static int btf_dedup_compact_types(struct btf_dedup *d)
 {
-       struct btf_type **new_types;
+       __u32 *new_offs;
        __u32 next_type_id = 1;
-       char *types_start, *p;
+       void *p;
        int i, len;
 
        /* we are going to reuse hypot_map to store compaction remapping */
@@ -2930,41 +4259,34 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
        for (i = 1; i <= d->btf->nr_types; i++)
                d->hypot_map[i] = BTF_UNPROCESSED_ID;
 
-       types_start = d->btf->nohdr_data + d->btf->hdr->type_off;
-       p = types_start;
+       p = d->btf->types_data;
 
        for (i = 1; i <= d->btf->nr_types; i++) {
                if (d->map[i] != i)
                        continue;
 
-               len = btf_type_size(d->btf->types[i]);
+               len = btf_type_size(btf__type_by_id(d->btf, i));
                if (len < 0)
                        return len;
 
-               memmove(p, d->btf->types[i], len);
+               memmove(p, btf__type_by_id(d->btf, i), len);
                d->hypot_map[i] = next_type_id;
-               d->btf->types[next_type_id] = (struct btf_type *)p;
+               d->btf->type_offs[next_type_id] = p - d->btf->types_data;
                p += len;
                next_type_id++;
        }
 
        /* shrink struct btf's internal types index and update btf_header */
        d->btf->nr_types = next_type_id - 1;
-       d->btf->types_size = d->btf->nr_types;
-       d->btf->hdr->type_len = p - types_start;
-       new_types = libbpf_reallocarray(d->btf->types, (1 + d->btf->nr_types),
-                                       sizeof(struct btf_type *));
-       if (!new_types)
+       d->btf->type_offs_cap = d->btf->nr_types + 1;
+       d->btf->hdr->type_len = p - d->btf->types_data;
+       new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
+                                      sizeof(*new_offs));
+       if (!new_offs)
                return -ENOMEM;
-       d->btf->types = new_types;
-
-       /* make sure string section follows type information without gaps */
-       d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data;
-       memmove(p, d->btf->strings, d->btf->hdr->str_len);
-       d->btf->strings = p;
-       p += d->btf->hdr->str_len;
-
-       d->btf->data_size = p - (char *)d->btf->data;
+       d->btf->type_offs = new_offs;
+       d->btf->hdr->str_off = d->btf->hdr->type_len;
+       d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len;
        return 0;
 }
 
@@ -2997,7 +4319,7 @@ static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
  */
 static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
 {
-       struct btf_type *t = d->btf->types[type_id];
+       struct btf_type *t = btf_type_by_id(d->btf, type_id);
        int i, r;
 
        switch (btf_kind(t)) {
index 2a55320..5724724 100644 (file)
@@ -5,6 +5,7 @@
 #define __LIBBPF_BTF_H
 
 #include <stdarg.h>
+#include <stdbool.h>
 #include <linux/btf.h>
 #include <linux/types.h>
 
@@ -24,8 +25,14 @@ struct btf_type;
 
 struct bpf_object;
 
+enum btf_endianness {
+       BTF_LITTLE_ENDIAN = 0,
+       BTF_BIG_ENDIAN = 1,
+};
+
 LIBBPF_API void btf__free(struct btf *btf);
 LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
+LIBBPF_API struct btf *btf__new_empty(void);
 LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
 LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
 LIBBPF_API struct btf *btf__parse_raw(const char *path);
@@ -40,6 +47,8 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
                                                  __u32 id);
 LIBBPF_API size_t btf__pointer_size(const struct btf *btf);
 LIBBPF_API int btf__set_pointer_size(struct btf *btf, size_t ptr_sz);
+LIBBPF_API enum btf_endianness btf__endianness(const struct btf *btf);
+LIBBPF_API int btf__set_endianness(struct btf *btf, enum btf_endianness endian);
 LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
@@ -47,6 +56,7 @@ LIBBPF_API int btf__fd(const struct btf *btf);
 LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
 LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
+LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
 LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
                                    __u32 expected_key_size,
@@ -72,6 +82,47 @@ LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
 
 LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
 
+LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
+LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
+
+LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding);
+LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_array(struct btf *btf,
+                             int index_type_id, int elem_type_id, __u32 nr_elems);
+/* struct/union construction APIs */
+LIBBPF_API int btf__add_struct(struct btf *btf, const char *name, __u32 sz);
+LIBBPF_API int btf__add_union(struct btf *btf, const char *name, __u32 sz);
+LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_id,
+                             __u32 bit_offset, __u32 bit_size);
+
+/* enum construction APIs */
+LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz);
+LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value);
+
+enum btf_fwd_kind {
+       BTF_FWD_STRUCT = 0,
+       BTF_FWD_UNION = 1,
+       BTF_FWD_ENUM = 2,
+};
+
+LIBBPF_API int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind);
+LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id);
+LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id);
+
+/* func and func_proto construction APIs */
+LIBBPF_API int btf__add_func(struct btf *btf, const char *name,
+                            enum btf_func_linkage linkage, int proto_type_id);
+LIBBPF_API int btf__add_func_proto(struct btf *btf, int ret_type_id);
+LIBBPF_API int btf__add_func_param(struct btf *btf, const char *name, int type_id);
+
+/* var & datasec construction APIs */
+LIBBPF_API int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id);
+LIBBPF_API int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz);
+LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id,
+                                        __u32 offset, __u32 byte_sz);
+
 struct btf_dedup_opts {
        unsigned int dedup_table_size;
        bool dont_resolve_fwds;
index 6c079b3..2f9d685 100644 (file)
@@ -60,11 +60,14 @@ struct btf_dump {
        struct btf_dump_opts opts;
        int ptr_sz;
        bool strip_mods;
+       int last_id;
 
        /* per-type auxiliary state */
        struct btf_dump_type_aux_state *type_states;
+       size_t type_states_cap;
        /* per-type optional cached unique name, must be freed, if present */
        const char **cached_names;
+       size_t cached_names_cap;
 
        /* topo-sorted list of dependent type definitions */
        __u32 *emit_queue;
@@ -90,14 +93,7 @@ struct btf_dump {
 
 static size_t str_hash_fn(const void *key, void *ctx)
 {
-       const char *s = key;
-       size_t h = 0;
-
-       while (*s) {
-               h = h * 31 + *s;
-               s++;
-       }
-       return h;
+       return str_hash(key);
 }
 
 static bool str_equal_fn(const void *a, const void *b, void *ctx)
@@ -120,6 +116,7 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
 }
 
 static int btf_dump_mark_referenced(struct btf_dump *d);
+static int btf_dump_resize(struct btf_dump *d);
 
 struct btf_dump *btf_dump__new(const struct btf *btf,
                               const struct btf_ext *btf_ext,
@@ -151,25 +148,8 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
                d->ident_names = NULL;
                goto err;
        }
-       d->type_states = calloc(1 + btf__get_nr_types(d->btf),
-                               sizeof(d->type_states[0]));
-       if (!d->type_states) {
-               err = -ENOMEM;
-               goto err;
-       }
-       d->cached_names = calloc(1 + btf__get_nr_types(d->btf),
-                                sizeof(d->cached_names[0]));
-       if (!d->cached_names) {
-               err = -ENOMEM;
-               goto err;
-       }
 
-       /* VOID is special */
-       d->type_states[0].order_state = ORDERED;
-       d->type_states[0].emit_state = EMITTED;
-
-       /* eagerly determine referenced types for anon enums */
-       err = btf_dump_mark_referenced(d);
+       err = btf_dump_resize(d);
        if (err)
                goto err;
 
@@ -179,9 +159,38 @@ err:
        return ERR_PTR(err);
 }
 
+static int btf_dump_resize(struct btf_dump *d)
+{
+       int err, last_id = btf__get_nr_types(d->btf);
+
+       if (last_id <= d->last_id)
+               return 0;
+
+       if (btf_ensure_mem((void **)&d->type_states, &d->type_states_cap,
+                          sizeof(*d->type_states), last_id + 1))
+               return -ENOMEM;
+       if (btf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap,
+                          sizeof(*d->cached_names), last_id + 1))
+               return -ENOMEM;
+
+       if (d->last_id == 0) {
+               /* VOID is special */
+               d->type_states[0].order_state = ORDERED;
+               d->type_states[0].emit_state = EMITTED;
+       }
+
+       /* eagerly determine referenced types for anon enums */
+       err = btf_dump_mark_referenced(d);
+       if (err)
+               return err;
+
+       d->last_id = last_id;
+       return 0;
+}
+
 void btf_dump__free(struct btf_dump *d)
 {
-       int i, cnt;
+       int i;
 
        if (IS_ERR_OR_NULL(d))
                return;
@@ -189,7 +198,7 @@ void btf_dump__free(struct btf_dump *d)
        free(d->type_states);
        if (d->cached_names) {
                /* any set cached name is owned by us and should be freed */
-               for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) {
+               for (i = 0; i <= d->last_id; i++) {
                        if (d->cached_names[i])
                                free((void *)d->cached_names[i]);
                }
@@ -229,6 +238,10 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
        if (id > btf__get_nr_types(d->btf))
                return -EINVAL;
 
+       err = btf_dump_resize(d);
+       if (err)
+               return err;
+
        d->emit_queue_cnt = 0;
        err = btf_dump_order_type(d, id, false);
        if (err < 0)
@@ -258,7 +271,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
        const struct btf_type *t;
        __u16 vlen;
 
-       for (i = 1; i <= n; i++) {
+       for (i = d->last_id + 1; i <= n; i++) {
                t = btf__type_by_id(d->btf, i);
                vlen = btf_vlen(t);
 
@@ -313,6 +326,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
        }
        return 0;
 }
+
 static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
 {
        __u32 *new_queue;
@@ -1056,11 +1070,15 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
                             const struct btf_dump_emit_type_decl_opts *opts)
 {
        const char *fname;
-       int lvl;
+       int lvl, err;
 
        if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
                return -EINVAL;
 
+       err = btf_dump_resize(d);
+       if (err)
+               return -EINVAL;
+
        fname = OPTS_GET(opts, field_name, "");
        lvl = OPTS_GET(opts, indent_level, 0);
        d->strip_mods = OPTS_GET(opts, strip_mods, false);
index e0af36b..d9b385f 100644 (file)
@@ -25,6 +25,18 @@ static inline size_t hash_bits(size_t h, int bits)
 #endif
 }
 
+/* generic C-string hashing function */
+static inline size_t str_hash(const char *s)
+{
+       size_t h = 0;
+
+       while (*s) {
+               h = h * 31 + *s;
+               s++;
+       }
+       return h;
+}
+
 typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
 typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
 
index 32dc444..a4f55f8 100644 (file)
@@ -9390,9 +9390,11 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
 }
 
 static struct bpf_link *
-bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
+bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
                       const char *target_name)
 {
+       DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
+                           .target_btf_id = btf_id);
        enum bpf_attach_type attach_type;
        char errmsg[STRERR_BUFSIZE];
        struct bpf_link *link;
@@ -9410,7 +9412,7 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
        link->detach = &bpf_link__detach_fd;
 
        attach_type = bpf_program__get_expected_attach_type(prog);
-       link_fd = bpf_link_create(prog_fd, target_fd, attach_type, NULL);
+       link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
        if (link_fd < 0) {
                link_fd = -errno;
                free(link);
@@ -9426,19 +9428,51 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
 struct bpf_link *
 bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
 {
-       return bpf_program__attach_fd(prog, cgroup_fd, "cgroup");
+       return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
 }
 
 struct bpf_link *
 bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
 {
-       return bpf_program__attach_fd(prog, netns_fd, "netns");
+       return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
 }
 
 struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
 {
        /* target_fd/target_ifindex use the same field in LINK_CREATE */
-       return bpf_program__attach_fd(prog, ifindex, "xdp");
+       return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
+}
+
+struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
+                                             int target_fd,
+                                             const char *attach_func_name)
+{
+       int btf_id;
+
+       if (!!target_fd != !!attach_func_name) {
+               pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
+                       prog->name);
+               return ERR_PTR(-EINVAL);
+       }
+
+       if (prog->type != BPF_PROG_TYPE_EXT) {
+               pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
+                       prog->name);
+               return ERR_PTR(-EINVAL);
+       }
+
+       if (target_fd) {
+               btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
+               if (btf_id < 0)
+                       return ERR_PTR(btf_id);
+
+               return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
+       } else {
+               /* no target, so use raw_tracepoint_open for compatibility
+                * with old kernels
+                */
+               return bpf_program__attach_trace(prog);
+       }
 }
 
 struct bpf_link *
index a750f67..6909ee8 100644 (file)
@@ -261,6 +261,9 @@ LIBBPF_API struct bpf_link *
 bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
 LIBBPF_API struct bpf_link *
 bpf_program__attach_xdp(struct bpf_program *prog, int ifindex);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_freplace(struct bpf_program *prog,
+                            int target_fd, const char *attach_func_name);
 
 struct bpf_map;
 
index 5f054da..4ebfadf 100644 (file)
@@ -303,7 +303,34 @@ LIBBPF_0.1.0 {
 LIBBPF_0.2.0 {
        global:
                bpf_prog_bind_map;
+               bpf_prog_test_run_opts;
+               bpf_program__attach_freplace;
                bpf_program__section_name;
+               btf__add_array;
+               btf__add_const;
+               btf__add_enum;
+               btf__add_enum_value;
+               btf__add_datasec;
+               btf__add_datasec_var_info;
+               btf__add_field;
+               btf__add_func;
+               btf__add_func_param;
+               btf__add_func_proto;
+               btf__add_fwd;
+               btf__add_int;
+               btf__add_ptr;
+               btf__add_restrict;
+               btf__add_str;
+               btf__add_struct;
+               btf__add_typedef;
+               btf__add_union;
+               btf__add_var;
+               btf__add_volatile;
+               btf__endianness;
+               btf__find_str;
+               btf__new_empty;
+               btf__set_endianness;
+               btf__str_by_offset;
                perf_buffer__buffer_cnt;
                perf_buffer__buffer_fd;
                perf_buffer__epoll_fd;
index 4d1c366..d99bc84 100644 (file)
@@ -105,6 +105,10 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
        return realloc(ptr, total);
 }
 
+void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+                 size_t cur_cnt, size_t max_cnt, size_t add_cnt);
+int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
+
 static inline bool libbpf_validate_opts(const char *opts,
                                        size_t opts_sz, size_t user_sz,
                                        const char *type_name)
@@ -136,6 +140,11 @@ static inline bool libbpf_validate_opts(const char *opts,
        ((opts) && opts->sz >= offsetofend(typeof(*(opts)), field))
 #define OPTS_GET(opts, field, fallback_value) \
        (OPTS_HAS(opts, field) ? (opts)->field : fallback_value)
+#define OPTS_SET(opts, field, value)           \
+       do {                                    \
+               if (OPTS_HAS(opts, field))      \
+                       (opts)->field = value;  \
+       } while (0)
 
 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
index e8fed55..3ab1200 100644 (file)
@@ -13,7 +13,6 @@ test_verifier_log
 feature
 test_sock
 test_sock_addr
-test_sock_fields
 urandom_read
 test_sockmap
 test_lirc_mode2_user
index 59a5fa5..bdbeafe 100644 (file)
@@ -35,7 +35,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
        test_verifier_log test_dev_cgroup test_tcpbpf_user \
        test_sock test_sockmap get_cgroup_id_user test_socket_cookie \
        test_cgroup_storage \
-       test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
+       test_netcnt test_tcpnotify_user test_sysctl \
        test_progs-no_alu32 \
        test_current_pid_tgid_new_ns
 
index 1a42768..332ed2f 100644 (file)
@@ -311,7 +311,6 @@ extern const struct bench bench_rename_kretprobe;
 extern const struct bench bench_rename_rawtp;
 extern const struct bench bench_rename_fentry;
 extern const struct bench bench_rename_fexit;
-extern const struct bench bench_rename_fmodret;
 extern const struct bench bench_trig_base;
 extern const struct bench bench_trig_tp;
 extern const struct bench bench_trig_rawtp;
@@ -333,7 +332,6 @@ static const struct bench *benchs[] = {
        &bench_rename_rawtp,
        &bench_rename_fentry,
        &bench_rename_fexit,
-       &bench_rename_fmodret,
        &bench_trig_base,
        &bench_trig_tp,
        &bench_trig_rawtp,
@@ -464,4 +462,3 @@ int main(int argc, char **argv)
 
        return 0;
 }
-
index e74cff4..a967674 100644 (file)
@@ -106,12 +106,6 @@ static void setup_fexit()
        attach_bpf(ctx.skel->progs.prog5);
 }
 
-static void setup_fmodret()
-{
-       setup_ctx();
-       attach_bpf(ctx.skel->progs.prog6);
-}
-
 static void *consumer(void *input)
 {
        return NULL;
@@ -182,14 +176,3 @@ const struct bench bench_rename_fexit = {
        .report_progress = hits_drops_report_progress,
        .report_final = hits_drops_report_final,
 };
-
-const struct bench bench_rename_fmodret = {
-       .name = "rename-fmodret",
-       .validate = validate,
-       .setup = setup_fmodret,
-       .producer_thread = producer,
-       .consumer_thread = consumer,
-       .measure = measure,
-       .report_progress = hits_drops_report_progress,
-       .report_final = hits_drops_report_final,
-};
index 5bf2fe9..2915664 100644 (file)
@@ -16,6 +16,7 @@ BPF_PROG(name, args)
 
 struct sock_common {
        unsigned char   skc_state;
+       __u16           skc_num;
 } __attribute__((preserve_access_index));
 
 enum sk_pacing {
@@ -45,6 +46,10 @@ struct inet_connection_sock {
        __u64                     icsk_ca_priv[104 / sizeof(__u64)];
 } __attribute__((preserve_access_index));
 
+struct request_sock {
+       struct sock_common              __req_common;
+} __attribute__((preserve_access_index));
+
 struct tcp_sock {
        struct inet_connection_sock     inet_conn;
 
@@ -115,14 +120,6 @@ enum tcp_ca_event {
        CA_EVENT_ECN_IS_CE = 5,
 };
 
-enum tcp_ca_state {
-       TCP_CA_Open = 0,
-       TCP_CA_Disorder = 1,
-       TCP_CA_CWR = 2,
-       TCP_CA_Recovery = 3,
-       TCP_CA_Loss = 4
-};
-
 struct ack_sample {
        __u32 pkts_acked;
        __s32 rtt_us;
index fe1a83b..448885b 100644 (file)
@@ -7,6 +7,7 @@
 #include "bpf_iter_task.skel.h"
 #include "bpf_iter_task_stack.skel.h"
 #include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_task_btf.skel.h"
 #include "bpf_iter_tcp4.skel.h"
 #include "bpf_iter_tcp6.skel.h"
 #include "bpf_iter_udp4.skel.h"
@@ -167,6 +168,83 @@ done:
        bpf_iter_task_file__destroy(skel);
 }
 
+#define TASKBUFSZ              32768
+
+static char taskbuf[TASKBUFSZ];
+
+static int do_btf_read(struct bpf_iter_task_btf *skel)
+{
+       struct bpf_program *prog = skel->progs.dump_task_struct;
+       struct bpf_iter_task_btf__bss *bss = skel->bss;
+       int iter_fd = -1, len = 0, bufleft = TASKBUFSZ;
+       struct bpf_link *link;
+       char *buf = taskbuf;
+       int ret = 0;
+
+       link = bpf_program__attach_iter(prog, NULL);
+       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+               return ret;
+
+       iter_fd = bpf_iter_create(bpf_link__fd(link));
+       if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+               goto free_link;
+
+       do {
+               len = read(iter_fd, buf, bufleft);
+               if (len > 0) {
+                       buf += len;
+                       bufleft -= len;
+               }
+       } while (len > 0);
+
+       if (bss->skip) {
+               printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
+               ret = 1;
+               test__skip();
+               goto free_link;
+       }
+
+       if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+               goto free_link;
+
+       CHECK(strstr(taskbuf, "(struct task_struct)") == NULL,
+             "check for btf representation of task_struct in iter data",
+             "struct task_struct not found");
+free_link:
+       if (iter_fd > 0)
+               close(iter_fd);
+       bpf_link__destroy(link);
+       return ret;
+}
+
+static void test_task_btf(void)
+{
+       struct bpf_iter_task_btf__bss *bss;
+       struct bpf_iter_task_btf *skel;
+       int ret;
+
+       skel = bpf_iter_task_btf__open_and_load();
+       if (CHECK(!skel, "bpf_iter_task_btf__open_and_load",
+                 "skeleton open_and_load failed\n"))
+               return;
+
+       bss = skel->bss;
+
+       ret = do_btf_read(skel);
+       if (ret)
+               goto cleanup;
+
+       if (CHECK(bss->tasks == 0, "check if iterated over tasks",
+                 "no task iteration, did BPF program run?\n"))
+               goto cleanup;
+
+       CHECK(bss->seq_err != 0, "check for unexpected err",
+             "bpf_seq_printf_btf returned %ld", bss->seq_err);
+
+cleanup:
+       bpf_iter_task_btf__destroy(skel);
+}
+
 static void test_tcp4(void)
 {
        struct bpf_iter_tcp4 *skel;
@@ -352,7 +430,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
        struct bpf_map_info map_info = {};
        struct bpf_iter_test_kern4 *skel;
        struct bpf_link *link;
-       __u32 page_size;
+       __u32 iter_size;
        char *buf;
 
        skel = bpf_iter_test_kern4__open();
@@ -374,19 +452,19 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
                  "map_creation failed: %s\n", strerror(errno)))
                goto free_map1;
 
-       /* bpf_seq_printf kernel buffer is one page, so one map
+       /* bpf_seq_printf kernel buffer is 8 pages, so one map
         * bpf_seq_write will mostly fill it, and the other map
         * will partially fill and then trigger overflow and need
         * bpf_seq_read restart.
         */
-       page_size = sysconf(_SC_PAGE_SIZE);
+       iter_size = sysconf(_SC_PAGE_SIZE) << 3;
 
        if (test_e2big_overflow) {
-               skel->rodata->print_len = (page_size + 8) / 8;
-               expected_read_len = 2 * (page_size + 8);
+               skel->rodata->print_len = (iter_size + 8) / 8;
+               expected_read_len = 2 * (iter_size + 8);
        } else if (!ret1) {
-               skel->rodata->print_len = (page_size - 8) / 8;
-               expected_read_len = 2 * (page_size - 8);
+               skel->rodata->print_len = (iter_size - 8) / 8;
+               expected_read_len = 2 * (iter_size - 8);
        } else {
                skel->rodata->print_len = 1;
                expected_read_len = 2 * 8;
@@ -957,6 +1035,8 @@ void test_bpf_iter(void)
                test_task_stack();
        if (test__start_subtest("task_file"))
                test_task_file();
+       if (test__start_subtest("task_btf"))
+               test_task_btf();
        if (test__start_subtest("tcp4"))
                test_tcp4();
        if (test__start_subtest("tcp6"))
index 39fb81d..c60091e 100644 (file)
@@ -129,6 +129,109 @@ done:
        return err;
 }
 
+static char *dump_buf;
+static size_t dump_buf_sz;
+static FILE *dump_buf_file;
+
+void test_btf_dump_incremental(void)
+{
+       struct btf *btf = NULL;
+       struct btf_dump *d = NULL;
+       struct btf_dump_opts opts;
+       int id, err, i;
+
+       dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
+       if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
+               return;
+       btf = btf__new_empty();
+       if (!ASSERT_OK_PTR(btf, "new_empty"))
+               goto err_out;
+       opts.ctx = dump_buf_file;
+       d = btf_dump__new(btf, NULL, &opts, btf_dump_printf);
+       if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new"))
+               goto err_out;
+
+       /* First, generate BTF corresponding to the following C code:
+        *
+        * enum { VAL = 1 };
+        *
+        * struct s { int x; };
+        *
+        */
+       id = btf__add_enum(btf, NULL, 4);
+       ASSERT_EQ(id, 1, "enum_id");
+       err = btf__add_enum_value(btf, "VAL", 1);
+       ASSERT_OK(err, "enum_val_ok");
+
+       id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
+       ASSERT_EQ(id, 2, "int_id");
+
+       id = btf__add_struct(btf, "s", 4);
+       ASSERT_EQ(id, 3, "struct_id");
+       err = btf__add_field(btf, "x", 2, 0, 0);
+       ASSERT_OK(err, "field_ok");
+
+       for (i = 1; i <= btf__get_nr_types(btf); i++) {
+               err = btf_dump__dump_type(d, i);
+               ASSERT_OK(err, "dump_type_ok");
+       }
+
+       fflush(dump_buf_file);
+       dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+       ASSERT_STREQ(dump_buf,
+"enum {\n"
+"      VAL = 1,\n"
+"};\n"
+"\n"
+"struct s {\n"
+"      int x;\n"
+"};\n\n", "c_dump1");
+
+       /* Now, after dumping original BTF, append another struct that embeds
+        * anonymous enum. It also has a name conflict with the first struct:
+        *
+        * struct s___2 {
+        *     enum { VAL___2 = 1 } x;
+        *     struct s s;
+        * };
+        *
+        * This will test that btf_dump'er maintains internal state properly.
+        * Note that VAL___2 enum value. It's because we've already emitted
+        * that enum as a global anonymous enum, so btf_dump will ensure that
+        * enum values don't conflict;
+        *
+        */
+       fseek(dump_buf_file, 0, SEEK_SET);
+
+       id = btf__add_struct(btf, "s", 4);
+       ASSERT_EQ(id, 4, "struct_id");
+       err = btf__add_field(btf, "x", 1, 0, 0);
+       ASSERT_OK(err, "field_ok");
+       err = btf__add_field(btf, "s", 3, 32, 0);
+       ASSERT_OK(err, "field_ok");
+
+       for (i = 1; i <= btf__get_nr_types(btf); i++) {
+               err = btf_dump__dump_type(d, i);
+               ASSERT_OK(err, "dump_type_ok");
+       }
+
+       fflush(dump_buf_file);
+       dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+       ASSERT_STREQ(dump_buf,
+"struct s___2 {\n"
+"      enum {\n"
+"              VAL___2 = 1,\n"
+"      } x;\n"
+"      struct s s;\n"
+"};\n\n" , "c_dump1");
+
+err_out:
+       fclose(dump_buf_file);
+       free(dump_buf);
+       btf_dump__free(d);
+       btf__free(btf);
+}
+
 void test_btf_dump() {
        int i;
 
@@ -140,4 +243,6 @@ void test_btf_dump() {
 
                test_btf_dump_case(i, &btf_dump_test_cases[i]);
        }
+       if (test__start_subtest("btf_dump: incremental"))
+               test_btf_dump_incremental();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
new file mode 100644 (file)
index 0000000..8c52d72
--- /dev/null
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <string.h>
+#include <byteswap.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
+
+void test_btf_endian() {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+       enum btf_endianness endian = BTF_LITTLE_ENDIAN;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+       enum btf_endianness endian = BTF_BIG_ENDIAN;
+#else
+#error "Unrecognized __BYTE_ORDER"
+#endif
+       enum btf_endianness swap_endian = 1 - endian;
+       struct btf *btf = NULL, *swap_btf = NULL;
+       const void *raw_data, *swap_raw_data;
+       const struct btf_type *t;
+       const struct btf_header *hdr;
+       __u32 raw_sz, swap_raw_sz;
+       int var_id;
+
+       /* Load BTF in native endianness */
+       btf = btf__parse_elf("btf_dump_test_case_syntax.o", NULL);
+       if (!ASSERT_OK_PTR(btf, "parse_native_btf"))
+               goto err_out;
+
+       ASSERT_EQ(btf__endianness(btf), endian, "endian");
+       btf__set_endianness(btf, swap_endian);
+       ASSERT_EQ(btf__endianness(btf), swap_endian, "endian");
+
+       /* Get raw BTF data in non-native endianness... */
+       raw_data = btf__get_raw_data(btf, &raw_sz);
+       if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
+               goto err_out;
+
+       /* ...and open it as a new BTF instance */
+       swap_btf = btf__new(raw_data, raw_sz);
+       if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf"))
+               goto err_out;
+
+       ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
+       ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types");
+
+       swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz);
+       if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
+               goto err_out;
+
+       /* both raw data should be identical (with non-native endianness) */
+       ASSERT_OK(memcmp(raw_data, swap_raw_data, raw_sz), "mem_identical");
+
+       /* make sure that at least BTF header data is really swapped */
+       hdr = swap_raw_data;
+       ASSERT_EQ(bswap_16(hdr->magic), BTF_MAGIC, "btf_magic_swapped");
+       ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes");
+
+       /* swap it back to native endianness */
+       btf__set_endianness(swap_btf, endian);
+       swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz);
+       if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
+               goto err_out;
+
+       /* now header should have native BTF_MAGIC */
+       hdr = swap_raw_data;
+       ASSERT_EQ(hdr->magic, BTF_MAGIC, "btf_magic_native");
+       ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes");
+
+       /* now modify original BTF */
+       var_id = btf__add_var(btf, "some_var", BTF_VAR_GLOBAL_ALLOCATED, 1);
+       CHECK(var_id <= 0, "var_id", "failed %d\n", var_id);
+
+       btf__free(swap_btf);
+       swap_btf = NULL;
+
+       btf__set_endianness(btf, swap_endian);
+       raw_data = btf__get_raw_data(btf, &raw_sz);
+       if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
+               goto err_out;
+
+       /* and re-open swapped raw data again */
+       swap_btf = btf__new(raw_data, raw_sz);
+       if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf"))
+               goto err_out;
+
+       ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
+       ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types");
+
+       /* the type should appear as if it was stored in native endianness */
+       t = btf__type_by_id(swap_btf, var_id);
+       ASSERT_STREQ(btf__str_by_offset(swap_btf, t->name_off), "some_var", "var_name");
+       ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_linkage");
+       ASSERT_EQ(t->type, 1, "var_type");
+
+err_out:
+       btf__free(btf);
+       btf__free(swap_btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
new file mode 100644 (file)
index 0000000..86ccf37
--- /dev/null
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#define _GNU_SOURCE
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <linux/compiler.h>
+#include <bpf/libbpf.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_btf_skc_cls_ingress.skel.h"
+
+static struct test_btf_skc_cls_ingress *skel;
+struct sockaddr_in6 srv_sa6;
+static __u32 duration;
+
+#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
+
+static int write_sysctl(const char *sysctl, const char *value)
+{
+       int fd, err, len;
+
+       fd = open(sysctl, O_WRONLY);
+       if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
+                 sysctl, strerror(errno), errno))
+               return -1;
+
+       len = strlen(value);
+       err = write(fd, value, len);
+       close(fd);
+       if (CHECK(err != len, "write sysctl",
+                 "write(%s, %s, %d): err:%d %s (%d)\n",
+                 sysctl, value, len, err, strerror(errno), errno))
+               return -1;
+
+       return 0;
+}
+
+static int prepare_netns(void)
+{
+       if (CHECK(unshare(CLONE_NEWNET), "create netns",
+                 "unshare(CLONE_NEWNET): %s (%d)",
+                 strerror(errno), errno))
+               return -1;
+
+       if (CHECK(system("ip link set dev lo up"),
+                 "ip link set dev lo up", "failed\n"))
+               return -1;
+
+       if (CHECK(system("tc qdisc add dev lo clsact"),
+                 "tc qdisc add dev lo clsact", "failed\n"))
+               return -1;
+
+       if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE),
+                 "install tc cls-prog at ingress", "failed\n"))
+               return -1;
+
+       /* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the
+        * bpf_tcp_gen_syncookie() helper.
+        */
+       if (write_sysctl("/proc/sys/net/ipv4/tcp_window_scaling", "1") ||
+           write_sysctl("/proc/sys/net/ipv4/tcp_timestamps", "1") ||
+           write_sysctl("/proc/sys/net/ipv4/tcp_sack", "1"))
+               return -1;
+
+       return 0;
+}
+
+static void reset_test(void)
+{
+       memset(&skel->bss->srv_sa6, 0, sizeof(skel->bss->srv_sa6));
+       skel->bss->listen_tp_sport = 0;
+       skel->bss->req_sk_sport = 0;
+       skel->bss->recv_cookie = 0;
+       skel->bss->gen_cookie = 0;
+       skel->bss->linum = 0;
+}
+
+static void print_err_line(void)
+{
+       if (skel->bss->linum)
+               printf("bpf prog error at line %u\n", skel->bss->linum);
+}
+
+static void test_conn(void)
+{
+       int listen_fd = -1, cli_fd = -1, err;
+       socklen_t addrlen = sizeof(srv_sa6);
+       int srv_port;
+
+       if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+               return;
+
+       listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+       if (CHECK_FAIL(listen_fd == -1))
+               return;
+
+       err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+       if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+                 errno))
+               goto done;
+       memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+       srv_port = ntohs(srv_sa6.sin6_port);
+
+       cli_fd = connect_to_fd(listen_fd, 0);
+       if (CHECK_FAIL(cli_fd == -1))
+               goto done;
+
+       if (CHECK(skel->bss->listen_tp_sport != srv_port ||
+                 skel->bss->req_sk_sport != srv_port,
+                 "Unexpected sk src port",
+                 "listen_tp_sport:%u req_sk_sport:%u expected:%u\n",
+                 skel->bss->listen_tp_sport, skel->bss->req_sk_sport,
+                 srv_port))
+               goto done;
+
+       if (CHECK(skel->bss->gen_cookie || skel->bss->recv_cookie,
+                 "Unexpected syncookie states",
+                 "gen_cookie:%u recv_cookie:%u\n",
+                 skel->bss->gen_cookie, skel->bss->recv_cookie))
+               goto done;
+
+       CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
+             skel->bss->linum);
+
+done:
+       if (listen_fd != -1)
+               close(listen_fd);
+       if (cli_fd != -1)
+               close(cli_fd);
+}
+
+static void test_syncookie(void)
+{
+       int listen_fd = -1, cli_fd = -1, err;
+       socklen_t addrlen = sizeof(srv_sa6);
+       int srv_port;
+
+       /* Enforce syncookie mode */
+       if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
+               return;
+
+       listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+       if (CHECK_FAIL(listen_fd == -1))
+               return;
+
+       err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+       if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+                 errno))
+               goto done;
+       memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+       srv_port = ntohs(srv_sa6.sin6_port);
+
+       cli_fd = connect_to_fd(listen_fd, 0);
+       if (CHECK_FAIL(cli_fd == -1))
+               goto done;
+
+       if (CHECK(skel->bss->listen_tp_sport != srv_port,
+                 "Unexpected tp src port",
+                 "listen_tp_sport:%u expected:%u\n",
+                 skel->bss->listen_tp_sport, srv_port))
+               goto done;
+
+       if (CHECK(skel->bss->req_sk_sport,
+                 "Unexpected req_sk src port",
+                 "req_sk_sport:%u expected:0\n",
+                  skel->bss->req_sk_sport))
+               goto done;
+
+       if (CHECK(!skel->bss->gen_cookie ||
+                 skel->bss->gen_cookie != skel->bss->recv_cookie,
+                 "Unexpected syncookie states",
+                 "gen_cookie:%u recv_cookie:%u\n",
+                 skel->bss->gen_cookie, skel->bss->recv_cookie))
+               goto done;
+
+       CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
+             skel->bss->linum);
+
+done:
+       if (listen_fd != -1)
+               close(listen_fd);
+       if (cli_fd != -1)
+               close(cli_fd);
+}
+
+struct test {
+       const char *desc;
+       void (*run)(void);
+};
+
+#define DEF_TEST(name) { #name, test_##name }
+static struct test tests[] = {
+       DEF_TEST(conn),
+       DEF_TEST(syncookie),
+};
+
+void test_btf_skc_cls_ingress(void)
+{
+       int i, err;
+
+       skel = test_btf_skc_cls_ingress__open_and_load();
+       if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n"))
+               return;
+
+       err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE);
+       if (CHECK(err, "bpf_program__pin",
+                 "cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) {
+               test_btf_skc_cls_ingress__destroy(skel);
+               return;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(tests); i++) {
+               if (!test__start_subtest(tests[i].desc))
+                       continue;
+
+               if (prepare_netns())
+                       break;
+
+               tests[i].run();
+
+               print_err_line();
+               reset_test();
+       }
+
+       bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE);
+       test_btf_skc_cls_ingress__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
new file mode 100644 (file)
index 0000000..314e1e7
--- /dev/null
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
+
+void test_btf_write() {
+       const struct btf_var_secinfo *vi;
+       const struct btf_type *t;
+       const struct btf_member *m;
+       const struct btf_enum *v;
+       const struct btf_param *p;
+       struct btf *btf;
+       int id, err, str_off;
+
+       btf = btf__new_empty();
+       if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf)))
+               return;
+
+       str_off = btf__find_str(btf, "int");
+       ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off");
+
+       str_off = btf__add_str(btf, "int");
+       ASSERT_EQ(str_off, 1, "int_str_off");
+
+       str_off = btf__find_str(btf, "int");
+       ASSERT_EQ(str_off, 1, "int_str_found_off");
+
+       /* BTF_KIND_INT */
+       id = btf__add_int(btf, "int", 4,  BTF_INT_SIGNED);
+       ASSERT_EQ(id, 1, "int_id");
+
+       t = btf__type_by_id(btf, 1);
+       /* should re-use previously added "int" string */
+       ASSERT_EQ(t->name_off, str_off, "int_name_off");
+       ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "int", "int_name");
+       ASSERT_EQ(btf_kind(t), BTF_KIND_INT, "int_kind");
+       ASSERT_EQ(t->size, 4, "int_sz");
+       ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc");
+       ASSERT_EQ(btf_int_bits(t), 32, "int_bits");
+
+       /* invalid int size */
+       id = btf__add_int(btf, "bad sz int", 7, 0);
+       ASSERT_ERR(id, "int_bad_sz");
+       /* invalid encoding */
+       id = btf__add_int(btf, "bad enc int", 4, 123);
+       ASSERT_ERR(id, "int_bad_enc");
+       /* NULL name */
+       id = btf__add_int(btf, NULL, 4, 0);
+       ASSERT_ERR(id, "int_bad_null_name");
+       /* empty name */
+       id = btf__add_int(btf, "", 4, 0);
+       ASSERT_ERR(id, "int_bad_empty_name");
+
+       /* PTR/CONST/VOLATILE/RESTRICT */
+       id = btf__add_ptr(btf, 1);
+       ASSERT_EQ(id, 2, "ptr_id");
+       t = btf__type_by_id(btf, 2);
+       ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind");
+       ASSERT_EQ(t->type, 1, "ptr_type");
+
+       id = btf__add_const(btf, 5); /* points forward to restrict */
+       ASSERT_EQ(id, 3, "const_id");
+       t = btf__type_by_id(btf, 3);
+       ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind");
+       ASSERT_EQ(t->type, 5, "const_type");
+
+       id = btf__add_volatile(btf, 3);
+       ASSERT_EQ(id, 4, "volatile_id");
+       t = btf__type_by_id(btf, 4);
+       ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind");
+       ASSERT_EQ(t->type, 3, "volatile_type");
+
+       id = btf__add_restrict(btf, 4);
+       ASSERT_EQ(id, 5, "restrict_id");
+       t = btf__type_by_id(btf, 5);
+       ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind");
+       ASSERT_EQ(t->type, 4, "restrict_type");
+
+       /* ARRAY */
+       id = btf__add_array(btf, 1, 2, 10); /* int *[10] */
+       ASSERT_EQ(id, 6, "array_id");
+       t = btf__type_by_id(btf, 6);
+       ASSERT_EQ(btf_kind(t), BTF_KIND_ARRAY, "array_kind");
+       ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type");
+       ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type");
+       ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems");
+
+       /* STRUCT */
+       err = btf__add_field(btf, "field", 1, 0, 0);
+       ASSERT_ERR(err, "no_struct_field");
+       id = btf__add_struct(btf, "s1", 8);
+       ASSERT_EQ(id, 7, "struct_id");
+       err = btf__add_field(btf, "f1", 1, 0, 0);
+       ASSERT_OK(err, "f1_res");
+       err = btf__add_field(btf, "f2", 1, 32, 16);
+       ASSERT_OK(err, "f2_res");
+
+       t = btf__type_by_id(btf, 7);
+       ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "s1", "struct_name");
+       ASSERT_EQ(btf_kind(t), BTF_KIND_STRUCT, "struct_kind");
+       ASSERT_EQ(btf_vlen(t), 2, "struct_vlen");
+       ASSERT_EQ(btf_kflag(t), true, "struct_kflag");
+       ASSERT_EQ(t->size, 8, "struct_sz");
+       m = btf_members(t) + 0;
+       ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name");
+       ASSERT_EQ(m->type, 1, "f1_type");
+       ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off");
+       ASSERT_EQ(btf_member_bitfield_size(t, 0), 0, "f1_bit_sz");
+       m = btf_members(t) + 1;
+       ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f2", "f2_name");
+       ASSERT_EQ(m->type, 1, "f2_type");
+       ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off");
+       ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz");
+
+       /* UNION */
+       id = btf__add_union(btf, "u1", 8);
+       ASSERT_EQ(id, 8, "union_id");
+
+       /* invalid, non-zero offset */
+       err = btf__add_field(btf, "field", 1, 1, 0);
+       ASSERT_ERR(err, "no_struct_field");
+
+       err = btf__add_field(btf, "f1", 1, 0, 16);
+       ASSERT_OK(err, "f1_res");
+
+       t = btf__type_by_id(btf, 8);
+       ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "u1", "union_name");
+       ASSERT_EQ(btf_kind(t), BTF_KIND_UNION, "union_kind");
+       ASSERT_EQ(btf_vlen(t), 1, "union_vlen");
+       ASSERT_EQ(btf_kflag(t), true, "union_kflag");
+       ASSERT_EQ(t->size, 8, "union_sz");
+       m = btf_members(t) + 0;
+       ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name");
+       ASSERT_EQ(m->type, 1, "f1_type");
+       ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off");
+       ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz");
+
+       /* ENUM */
+       id = btf__add_enum(btf, "e1", 4);
+       ASSERT_EQ(id, 9, "enum_id");
+       err = btf__add_enum_value(btf, "v1", 1);
+       ASSERT_OK(err, "v1_res");
+       err = btf__add_enum_value(btf, "v2", 2);
+       ASSERT_OK(err, "v2_res");
+
+       t = btf__type_by_id(btf, 9);
+       ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum_name");
+       ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_kind");
+       ASSERT_EQ(btf_vlen(t), 2, "enum_vlen");
+       ASSERT_EQ(t->size, 4, "enum_sz");
+       v = btf_enum(t) + 0;
+       ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v1", "v1_name");
+       ASSERT_EQ(v->val, 1, "v1_val");
+       v = btf_enum(t) + 1;
+       ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name");
+       ASSERT_EQ(v->val, 2, "v2_val");
+
+       /* FWDs */
+       id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT);
+       ASSERT_EQ(id, 10, "struct_fwd_id");
+       t = btf__type_by_id(btf, 10);
+       ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name");
+       ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
+       ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag");
+
+       id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION);
+       ASSERT_EQ(id, 11, "union_fwd_id");
+       t = btf__type_by_id(btf, 11);
+       ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name");
+       ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
+       ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag");
+
+       id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM);
+       ASSERT_EQ(id, 12, "enum_fwd_id");
+       t = btf__type_by_id(btf, 12);
+       ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "enum_fwd", "fwd_name");
+       ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind");
+       ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind");
+       ASSERT_EQ(t->size, 4, "enum_fwd_sz");
+
+       /* TYPEDEF */
+       id = btf__add_typedef(btf, "typedef1", 1);
+       ASSERT_EQ(id, 13, "typedef_fwd_id");
+       t = btf__type_by_id(btf, 13);
+       ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name");
+       ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind");
+       ASSERT_EQ(t->type, 1, "typedef_type");
+
+       /* FUNC & FUNC_PROTO */
+       id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15);
+       ASSERT_EQ(id, 14, "func_id");
+       t = btf__type_by_id(btf, 14);
+       ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "func1", "func_name");
+       ASSERT_EQ(t->type, 15, "func_type");
+       ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind");
+       ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen");
+
+       id = btf__add_func_proto(btf, 1);
+       ASSERT_EQ(id, 15, "func_proto_id");
+       err = btf__add_func_param(btf, "p1", 1);
+       ASSERT_OK(err, "p1_res");
+       err = btf__add_func_param(btf, "p2", 2);
+       ASSERT_OK(err, "p2_res");
+
+       t = btf__type_by_id(btf, 15);
+       ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC_PROTO, "func_proto_kind");
+       ASSERT_EQ(btf_vlen(t), 2, "func_proto_vlen");
+       ASSERT_EQ(t->type, 1, "func_proto_ret_type");
+       p = btf_params(t) + 0;
+       ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p1", "p1_name");
+       ASSERT_EQ(p->type, 1, "p1_type");
+       p = btf_params(t) + 1;
+       ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name");
+       ASSERT_EQ(p->type, 2, "p2_type");
+
+       /* VAR */
+       id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1);
+       ASSERT_EQ(id, 16, "var_id");
+       t = btf__type_by_id(btf, 16);
+       ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "var1", "var_name");
+       ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind");
+       ASSERT_EQ(t->type, 1, "var_type");
+       ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type");
+
+       /* DATASECT */
+       id = btf__add_datasec(btf, "datasec1", 12);
+       ASSERT_EQ(id, 17, "datasec_id");
+       err = btf__add_datasec_var_info(btf, 1, 4, 8);
+       ASSERT_OK(err, "v1_res");
+
+       t = btf__type_by_id(btf, 17);
+       ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "datasec1", "datasec_name");
+       ASSERT_EQ(t->size, 12, "datasec_sz");
+       ASSERT_EQ(btf_kind(t), BTF_KIND_DATASEC, "datasec_kind");
+       ASSERT_EQ(btf_vlen(t), 1, "datasec_vlen");
+       vi = btf_var_secinfos(t) + 0;
+       ASSERT_EQ(vi->type, 1, "v1_type");
+       ASSERT_EQ(vi->offset, 4, "v1_off");
+       ASSERT_EQ(vi->size, 8, "v1_sz");
+
+       btf__free(btf);
+}
index eda6827..5c04489 100644 (file)
@@ -2,36 +2,79 @@
 /* Copyright (c) 2019 Facebook */
 #include <test_progs.h>
 #include <network_helpers.h>
+#include <bpf/btf.h>
+
+typedef int (*test_cb)(struct bpf_object *obj);
+
+static int check_data_map(struct bpf_object *obj, int prog_cnt, bool reset)
+{
+       struct bpf_map *data_map = NULL, *map;
+       __u64 *result = NULL;
+       const int zero = 0;
+       __u32 duration = 0;
+       int ret = -1, i;
+
+       result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64));
+       if (CHECK(!result, "alloc_memory", "failed to alloc memory"))
+               return -ENOMEM;
+
+       bpf_object__for_each_map(map, obj)
+               if (bpf_map__is_internal(map)) {
+                       data_map = map;
+                       break;
+               }
+       if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+               goto out;
+
+       ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result);
+       if (CHECK(ret, "get_result",
+                 "failed to get output data: %d\n", ret))
+               goto out;
+
+       for (i = 0; i < prog_cnt; i++) {
+               if (CHECK(result[i] != 1, "result",
+                         "fexit_bpf2bpf result[%d] failed err %llu\n",
+                         i, result[i]))
+                       goto out;
+               result[i] = 0;
+       }
+       if (reset) {
+               ret = bpf_map_update_elem(bpf_map__fd(data_map), &zero, result, 0);
+               if (CHECK(ret, "reset_result", "failed to reset result\n"))
+                       goto out;
+       }
+
+       ret = 0;
+out:
+       free(result);
+       return ret;
+}
 
 static void test_fexit_bpf2bpf_common(const char *obj_file,
                                      const char *target_obj_file,
                                      int prog_cnt,
                                      const char **prog_name,
-                                     bool run_prog)
+                                     bool run_prog,
+                                     test_cb cb)
 {
-       struct bpf_object *obj = NULL, *pkt_obj;
-       int err, pkt_fd, i;
-       struct bpf_link **link = NULL;
+       struct bpf_object *obj = NULL, *tgt_obj;
        struct bpf_program **prog = NULL;
+       struct bpf_link **link = NULL;
        __u32 duration = 0, retval;
-       struct bpf_map *data_map;
-       const int zero = 0;
-       __u64 *result = NULL;
+       int err, tgt_fd, i;
 
        err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
-                           &pkt_obj, &pkt_fd);
+                           &tgt_obj, &tgt_fd);
        if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
                  target_obj_file, err, errno))
                return;
        DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
-                           .attach_prog_fd = pkt_fd,
+                           .attach_prog_fd = tgt_fd,
                           );
 
        link = calloc(sizeof(struct bpf_link *), prog_cnt);
        prog = calloc(sizeof(struct bpf_program *), prog_cnt);
-       result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64));
-       if (CHECK(!link || !prog || !result, "alloc_memory",
-                 "failed to alloc memory"))
+       if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory"))
                goto close_prog;
 
        obj = bpf_object__open_file(obj_file, &opts);
@@ -53,39 +96,33 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
                        goto close_prog;
        }
 
-       if (!run_prog)
-               goto close_prog;
+       if (cb) {
+               err = cb(obj);
+               if (err)
+                       goto close_prog;
+       }
 
-       data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss");
-       if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+       if (!run_prog)
                goto close_prog;
 
-       err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+       err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
                                NULL, NULL, &retval, &duration);
        CHECK(err || retval, "ipv6",
              "err %d errno %d retval %d duration %d\n",
              err, errno, retval, duration);
 
-       err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result);
-       if (CHECK(err, "get_result",
-                 "failed to get output data: %d\n", err))
+       if (check_data_map(obj, prog_cnt, false))
                goto close_prog;
 
-       for (i = 0; i < prog_cnt; i++)
-               if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %llu\n",
-                         result[i]))
-                       goto close_prog;
-
 close_prog:
        for (i = 0; i < prog_cnt; i++)
                if (!IS_ERR_OR_NULL(link[i]))
                        bpf_link__destroy(link[i]);
        if (!IS_ERR_OR_NULL(obj))
                bpf_object__close(obj);
-       bpf_object__close(pkt_obj);
+       bpf_object__close(tgt_obj);
        free(link);
        free(prog);
-       free(result);
 }
 
 static void test_target_no_callees(void)
@@ -96,7 +133,7 @@ static void test_target_no_callees(void)
        test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o",
                                  "./test_pkt_md_access.o",
                                  ARRAY_SIZE(prog_name),
-                                 prog_name, true);
+                                 prog_name, true, NULL);
 }
 
 static void test_target_yes_callees(void)
@@ -110,7 +147,7 @@ static void test_target_yes_callees(void)
        test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
                                  "./test_pkt_access.o",
                                  ARRAY_SIZE(prog_name),
-                                 prog_name, true);
+                                 prog_name, true, NULL);
 }
 
 static void test_func_replace(void)
@@ -128,7 +165,7 @@ static void test_func_replace(void)
        test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
                                  "./test_pkt_access.o",
                                  ARRAY_SIZE(prog_name),
-                                 prog_name, true);
+                                 prog_name, true, NULL);
 }
 
 static void test_func_replace_verify(void)
@@ -139,9 +176,116 @@ static void test_func_replace_verify(void)
        test_fexit_bpf2bpf_common("./freplace_connect4.o",
                                  "./connect4_prog.o",
                                  ARRAY_SIZE(prog_name),
-                                 prog_name, false);
+                                 prog_name, false, NULL);
 }
 
+static int test_second_attach(struct bpf_object *obj)
+{
+       const char *prog_name = "freplace/get_constant";
+       const char *tgt_name = prog_name + 9; /* cut off freplace/ */
+       const char *tgt_obj_file = "./test_pkt_access.o";
+       struct bpf_program *prog = NULL;
+       struct bpf_object *tgt_obj;
+       __u32 duration = 0, retval;
+       struct bpf_link *link;
+       int err = 0, tgt_fd;
+
+       prog = bpf_object__find_program_by_title(obj, prog_name);
+       if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name))
+               return -ENOENT;
+
+       err = bpf_prog_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC,
+                           &tgt_obj, &tgt_fd);
+       if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n",
+                 tgt_obj_file, err, errno))
+               return err;
+
+       link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
+       if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd))
+               goto out;
+
+       err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+                               NULL, NULL, &retval, &duration);
+       if (CHECK(err || retval, "ipv6",
+                 "err %d errno %d retval %d duration %d\n",
+                 err, errno, retval, duration))
+               goto out;
+
+       err = check_data_map(obj, 1, true);
+       if (err)
+               goto out;
+
+out:
+       bpf_link__destroy(link);
+       bpf_object__close(tgt_obj);
+       return err;
+}
+
+static void test_func_replace_multi(void)
+{
+       const char *prog_name[] = {
+               "freplace/get_constant",
+       };
+       test_fexit_bpf2bpf_common("./freplace_get_constant.o",
+                                 "./test_pkt_access.o",
+                                 ARRAY_SIZE(prog_name),
+                                 prog_name, true, test_second_attach);
+}
+
+static void test_fmod_ret_freplace(void)
+{
+       struct bpf_object *freplace_obj = NULL, *pkt_obj, *fmod_obj = NULL;
+       const char *freplace_name = "./freplace_get_constant.o";
+       const char *fmod_ret_name = "./fmod_ret_freplace.o";
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+       const char *tgt_name = "./test_pkt_access.o";
+       struct bpf_link *freplace_link = NULL;
+       struct bpf_program *prog;
+       __u32 duration = 0;
+       int err, pkt_fd;
+
+       err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC,
+                           &pkt_obj, &pkt_fd);
+       /* the target prog should load fine */
+       if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
+                 tgt_name, err, errno))
+               return;
+       opts.attach_prog_fd = pkt_fd;
+
+       freplace_obj = bpf_object__open_file(freplace_name, &opts);
+       if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open",
+                 "failed to open %s: %ld\n", freplace_name,
+                 PTR_ERR(freplace_obj)))
+               goto out;
+
+       err = bpf_object__load(freplace_obj);
+       if (CHECK(err, "freplace_obj_load", "err %d\n", err))
+               goto out;
+
+       prog = bpf_program__next(NULL, freplace_obj);
+       freplace_link = bpf_program__attach_trace(prog);
+       if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n"))
+               goto out;
+
+       opts.attach_prog_fd = bpf_program__fd(prog);
+       fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
+       if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open",
+                 "failed to open %s: %ld\n", fmod_ret_name,
+                 PTR_ERR(fmod_obj)))
+               goto out;
+
+       err = bpf_object__load(fmod_obj);
+       if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n"))
+               goto out;
+
+out:
+       bpf_link__destroy(freplace_link);
+       bpf_object__close(freplace_obj);
+       bpf_object__close(fmod_obj);
+       bpf_object__close(pkt_obj);
+}
+
+
 static void test_func_sockmap_update(void)
 {
        const char *prog_name[] = {
@@ -150,7 +294,7 @@ static void test_func_sockmap_update(void)
        test_fexit_bpf2bpf_common("./freplace_cls_redirect.o",
                                  "./test_cls_redirect.o",
                                  ARRAY_SIZE(prog_name),
-                                 prog_name, false);
+                                 prog_name, false, NULL);
 }
 
 static void test_obj_load_failure_common(const char *obj_file,
@@ -222,4 +366,8 @@ void test_fexit_bpf2bpf(void)
                test_func_replace_return_code();
        if (test__start_subtest("func_map_prog_compatibility"))
                test_func_map_prog_compatibility();
+       if (test__start_subtest("func_replace_multi"))
+               test_func_replace_multi();
+       if (test__start_subtest("fmod_ret_freplace"))
+               test_fmod_ret_freplace();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c
new file mode 100644 (file)
index 0000000..673d383
--- /dev/null
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+#include <linux/bpf.h>
+#include "test_pe_preserve_elems.skel.h"
+
+static int duration;
+
+static void test_one_map(struct bpf_map *map, struct bpf_program *prog,
+                        bool has_share_pe)
+{
+       int err, key = 0, pfd = -1, mfd = bpf_map__fd(map);
+       DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts);
+       struct perf_event_attr attr = {
+               .size = sizeof(struct perf_event_attr),
+               .type = PERF_TYPE_SOFTWARE,
+               .config = PERF_COUNT_SW_CPU_CLOCK,
+       };
+
+       pfd = syscall(__NR_perf_event_open, &attr, 0 /* pid */,
+                     -1 /* cpu 0 */, -1 /* group id */, 0 /* flags */);
+       if (CHECK(pfd < 0, "perf_event_open", "failed\n"))
+               return;
+
+       err = bpf_map_update_elem(mfd, &key, &pfd, BPF_ANY);
+       close(pfd);
+       if (CHECK(err < 0, "bpf_map_update_elem", "failed\n"))
+               return;
+
+       err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+       if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n"))
+               return;
+       if (CHECK(opts.retval != 0, "bpf_perf_event_read_value",
+                 "failed with %d\n", opts.retval))
+               return;
+
+       /* closing mfd, prog still holds a reference on map */
+       close(mfd);
+
+       err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+       if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n"))
+               return;
+
+       if (has_share_pe) {
+               CHECK(opts.retval != 0, "bpf_perf_event_read_value",
+                     "failed with %d\n", opts.retval);
+       } else {
+               CHECK(opts.retval != -ENOENT, "bpf_perf_event_read_value",
+                     "should have failed with %d, but got %d\n", -ENOENT,
+                     opts.retval);
+       }
+}
+
+void test_pe_preserve_elems(void)
+{
+       struct test_pe_preserve_elems *skel;
+
+       skel = test_pe_preserve_elems__open_and_load();
+       if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+               return;
+
+       test_one_map(skel->maps.array_1, skel->progs.read_array_1, false);
+       test_one_map(skel->maps.array_2, skel->progs.read_array_2, true);
+
+       test_pe_preserve_elems__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
new file mode 100644 (file)
index 0000000..c5fb191
--- /dev/null
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+#include <linux/bpf.h>
+#include "bpf/libbpf_internal.h"
+#include "test_raw_tp_test_run.skel.h"
+
+static int duration;
+
+void test_raw_tp_test_run(void)
+{
+       struct bpf_prog_test_run_attr test_attr = {};
+       int comm_fd = -1, err, nr_online, i, prog_fd;
+       __u64 args[2] = {0x1234ULL, 0x5678ULL};
+       int expected_retval = 0x1234 + 0x5678;
+       struct test_raw_tp_test_run *skel;
+       char buf[] = "new_name";
+       bool *online = NULL;
+       DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+                           .ctx_in = args,
+                           .ctx_size_in = sizeof(args),
+                           .flags = BPF_F_TEST_RUN_ON_CPU,
+               );
+
+       err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online,
+                                 &nr_online);
+       if (CHECK(err, "parse_cpu_mask_file", "err %d\n", err))
+               return;
+
+       skel = test_raw_tp_test_run__open_and_load();
+       if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+               goto cleanup;
+
+       err = test_raw_tp_test_run__attach(skel);
+       if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+               goto cleanup;
+
+       comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+       if (CHECK(comm_fd < 0, "open /proc/self/comm", "err %d\n", errno))
+               goto cleanup;
+
+       err = write(comm_fd, buf, sizeof(buf));
+       CHECK(err < 0, "task rename", "err %d", errno);
+
+       CHECK(skel->bss->count == 0, "check_count", "didn't increase\n");
+       CHECK(skel->data->on_cpu != 0xffffffff, "check_on_cpu", "got wrong value\n");
+
+       prog_fd = bpf_program__fd(skel->progs.rename);
+       test_attr.prog_fd = prog_fd;
+       test_attr.ctx_in = args;
+       test_attr.ctx_size_in = sizeof(__u64);
+
+       err = bpf_prog_test_run_xattr(&test_attr);
+       CHECK(err == 0, "test_run", "should fail for too small ctx\n");
+
+       test_attr.ctx_size_in = sizeof(args);
+       err = bpf_prog_test_run_xattr(&test_attr);
+       CHECK(err < 0, "test_run", "err %d\n", errno);
+       CHECK(test_attr.retval != expected_retval, "check_retval",
+             "expect 0x%x, got 0x%x\n", expected_retval, test_attr.retval);
+
+       for (i = 0; i < nr_online; i++) {
+               if (!online[i])
+                       continue;
+
+               opts.cpu = i;
+               opts.retval = 0;
+               err = bpf_prog_test_run_opts(prog_fd, &opts);
+               CHECK(err < 0, "test_run_opts", "err %d\n", errno);
+               CHECK(skel->data->on_cpu != i, "check_on_cpu",
+                     "expect %d got %d\n", i, skel->data->on_cpu);
+               CHECK(opts.retval != expected_retval,
+                     "check_retval", "expect 0x%x, got 0x%x\n",
+                     expected_retval, opts.retval);
+       }
+
+       /* invalid cpu ID should fail with ENXIO */
+       opts.cpu = 0xffffffff;
+       err = bpf_prog_test_run_opts(prog_fd, &opts);
+       CHECK(err != -1 || errno != ENXIO,
+             "test_run_opts_fail",
+             "should failed with ENXIO\n");
+
+       /* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */
+       opts.cpu = 1;
+       opts.flags = 0;
+       err = bpf_prog_test_run_opts(prog_fd, &opts);
+       CHECK(err != -1 || errno != EINVAL,
+             "test_run_opts_fail",
+             "should failed with EINVAL\n");
+
+cleanup:
+       close(comm_fd);
+       test_raw_tp_test_run__destroy(skel);
+       free(online);
+}
index 8826c65..6ace5e9 100644 (file)
@@ -28,6 +28,12 @@ struct symbol test_symbols[] = {
        { "func",    BTF_KIND_FUNC,    -1 },
 };
 
+/* Align the .BTF_ids section to 4 bytes */
+asm (
+".pushsection " BTF_IDS_SECTION " ,\"a\"; \n"
+".balign 4, 0;                            \n"
+".popsection;                             \n");
+
 BTF_ID_LIST(test_list_local)
 BTF_ID_UNUSED
 BTF_ID(typedef, S)
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
new file mode 100644 (file)
index 0000000..686b40f
--- /dev/null
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <linux/btf.h>
+#include "netif_receive_skb.skel.h"
+
+/* Demonstrate that bpf_snprintf_btf succeeds and that various data types
+ * are formatted correctly.
+ */
+void test_snprintf_btf(void)
+{
+       struct netif_receive_skb *skel;
+       struct netif_receive_skb__bss *bss;
+       int err, duration = 0;
+
+       skel = netif_receive_skb__open();
+       if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+               return;
+
+       err = netif_receive_skb__load(skel);
+       if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+               goto cleanup;
+
+       bss = skel->bss;
+
+       err = netif_receive_skb__attach(skel);
+       if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+               goto cleanup;
+
+       /* generate receive event */
+       err = system("ping -c 1 127.0.0.1 > /dev/null");
+       if (CHECK(err, "system", "ping failed: %d\n", err))
+               goto cleanup;
+
+       if (bss->skip) {
+               printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
+               test__skip();
+               goto cleanup;
+       }
+
+       /*
+        * Make sure netif_receive_skb program was triggered
+        * and it set expected return values from bpf_trace_printk()s
+        * and all tests ran.
+        */
+       if (CHECK(bss->ret <= 0,
+                 "bpf_snprintf_btf: got return value",
+                 "ret <= 0 %ld test %d\n", bss->ret, bss->ran_subtests))
+               goto cleanup;
+
+       if (CHECK(bss->ran_subtests == 0, "check if subtests ran",
+                 "no subtests ran, did BPF program run?"))
+               goto cleanup;
+
+       if (CHECK(bss->num_subtests != bss->ran_subtests,
+                 "check all subtests ran",
+                 "only ran %d of %d tests\n", bss->num_subtests,
+                 bss->ran_subtests))
+               goto cleanup;
+
+cleanup:
+       netif_receive_skb__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
new file mode 100644 (file)
index 0000000..af87118
--- /dev/null
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <linux/compiler.h>
+
+#include "network_helpers.h"
+#include "cgroup_helpers.h"
+#include "test_progs.h"
+#include "bpf_rlimit.h"
+#include "test_sock_fields.skel.h"
+
+enum bpf_linum_array_idx {
+       EGRESS_LINUM_IDX,
+       INGRESS_LINUM_IDX,
+       __NR_BPF_LINUM_ARRAY_IDX,
+};
+
+struct bpf_spinlock_cnt {
+       struct bpf_spin_lock lock;
+       __u32 cnt;
+};
+
+#define PARENT_CGROUP  "/test-bpf-sock-fields"
+#define CHILD_CGROUP   "/test-bpf-sock-fields/child"
+#define DATA "Hello BPF!"
+#define DATA_LEN sizeof(DATA)
+
+static struct sockaddr_in6 srv_sa6, cli_sa6;
+static int sk_pkt_out_cnt10_fd;
+static struct test_sock_fields *skel;
+static int sk_pkt_out_cnt_fd;
+static __u64 parent_cg_id;
+static __u64 child_cg_id;
+static int linum_map_fd;
+static __u32 duration;
+
+static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
+static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
+
+static void print_sk(const struct bpf_sock *sk, const char *prefix)
+{
+       char src_ip4[24], dst_ip4[24];
+       char src_ip6[64], dst_ip6[64];
+
+       inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
+       inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
+       inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
+       inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
+
+       printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
+              "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
+              "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
+              prefix,
+              sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
+              sk->mark, sk->priority,
+              sk->src_ip4, src_ip4,
+              sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
+              src_ip6, sk->src_port,
+              sk->dst_ip4, dst_ip4,
+              sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
+              dst_ip6, ntohs(sk->dst_port));
+}
+
+static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix)
+{
+       printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
+              "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
+              "rate_delivered:%u rate_interval_us:%u packets_out:%u "
+              "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
+              "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
+              "bytes_received:%llu bytes_acked:%llu\n",
+              prefix,
+              tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
+              tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
+              tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
+              tp->packets_out, tp->retrans_out, tp->total_retrans,
+              tp->segs_in, tp->data_segs_in, tp->segs_out,
+              tp->data_segs_out, tp->lost_out, tp->sacked_out,
+              tp->bytes_received, tp->bytes_acked);
+}
+
+static void check_result(void)
+{
+       struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
+       struct bpf_sock srv_sk, cli_sk, listen_sk;
+       __u32 ingress_linum, egress_linum;
+       int err;
+
+       err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
+                                 &egress_linum);
+       CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+             "err:%d errno:%d\n", err, errno);
+
+       err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
+                                 &ingress_linum);
+       CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+             "err:%d errno:%d\n", err, errno);
+
+       memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
+       memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
+       memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
+       memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp));
+       memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk));
+       memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp));
+
+       print_sk(&listen_sk, "listen_sk");
+       print_sk(&srv_sk, "srv_sk");
+       print_sk(&cli_sk, "cli_sk");
+       print_tp(&listen_tp, "listen_tp");
+       print_tp(&srv_tp, "srv_tp");
+       print_tp(&cli_tp, "cli_tp");
+
+       CHECK(listen_sk.state != 10 ||
+             listen_sk.family != AF_INET6 ||
+             listen_sk.protocol != IPPROTO_TCP ||
+             memcmp(listen_sk.src_ip6, &in6addr_loopback,
+                    sizeof(listen_sk.src_ip6)) ||
+             listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
+             listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
+             listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+             listen_sk.dst_port,
+             "listen_sk",
+             "Unexpected. Check listen_sk output. ingress_linum:%u\n",
+             ingress_linum);
+
+       CHECK(srv_sk.state == 10 ||
+             !srv_sk.state ||
+             srv_sk.family != AF_INET6 ||
+             srv_sk.protocol != IPPROTO_TCP ||
+             memcmp(srv_sk.src_ip6, &in6addr_loopback,
+                    sizeof(srv_sk.src_ip6)) ||
+             memcmp(srv_sk.dst_ip6, &in6addr_loopback,
+                    sizeof(srv_sk.dst_ip6)) ||
+             srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+             srv_sk.dst_port != cli_sa6.sin6_port,
+             "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n",
+             egress_linum);
+
+       CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n");
+
+       CHECK(cli_sk.state == 10 ||
+             !cli_sk.state ||
+             cli_sk.family != AF_INET6 ||
+             cli_sk.protocol != IPPROTO_TCP ||
+             memcmp(cli_sk.src_ip6, &in6addr_loopback,
+                    sizeof(cli_sk.src_ip6)) ||
+             memcmp(cli_sk.dst_ip6, &in6addr_loopback,
+                    sizeof(cli_sk.dst_ip6)) ||
+             cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
+             cli_sk.dst_port != srv_sa6.sin6_port,
+             "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n",
+             egress_linum);
+
+       CHECK(listen_tp.data_segs_out ||
+             listen_tp.data_segs_in ||
+             listen_tp.total_retrans ||
+             listen_tp.bytes_acked,
+             "listen_tp",
+             "Unexpected. Check listen_tp output. ingress_linum:%u\n",
+             ingress_linum);
+
+       CHECK(srv_tp.data_segs_out != 2 ||
+             srv_tp.data_segs_in ||
+             srv_tp.snd_cwnd != 10 ||
+             srv_tp.total_retrans ||
+             srv_tp.bytes_acked < 2 * DATA_LEN,
+             "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n",
+             egress_linum);
+
+       CHECK(cli_tp.data_segs_out ||
+             cli_tp.data_segs_in != 2 ||
+             cli_tp.snd_cwnd != 10 ||
+             cli_tp.total_retrans ||
+             cli_tp.bytes_received < 2 * DATA_LEN,
+             "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n",
+             egress_linum);
+
+       CHECK(skel->bss->parent_cg_id != parent_cg_id,
+             "parent_cg_id", "%zu != %zu\n",
+             (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id);
+
+       CHECK(skel->bss->child_cg_id != child_cg_id,
+             "child_cg_id", "%zu != %zu\n",
+              (size_t)skel->bss->child_cg_id, (size_t)child_cg_id);
+}
+
+static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
+{
+       struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
+       int err;
+
+       pkt_out_cnt.cnt = ~0;
+       pkt_out_cnt10.cnt = ~0;
+       err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
+       if (!err)
+               err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
+                                         &pkt_out_cnt10);
+
+       /* The bpf prog only counts for fullsock and
+        * passive connection did not become fullsock until 3WHS
+        * had been finished, so the bpf prog only counted two data
+        * packet out.
+        */
+       CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 ||
+             pkt_out_cnt10.cnt < 0xeB9F + 20,
+             "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
+             "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
+             err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
+
+       pkt_out_cnt.cnt = ~0;
+       pkt_out_cnt10.cnt = ~0;
+       err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
+       if (!err)
+               err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
+                                         &pkt_out_cnt10);
+       /* Active connection is fullsock from the beginning.
+        * 1 SYN and 1 ACK during 3WHS
+        * 2 Acks on data packet.
+        *
+        * The bpf_prog initialized it to 0xeB9F.
+        */
+       CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 ||
+             pkt_out_cnt10.cnt < 0xeB9F + 40,
+             "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
+             "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
+             err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
+}
+
+static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
+{
+       struct bpf_spinlock_cnt scnt = {};
+       int err;
+
+       scnt.cnt = pkt_out_cnt;
+       err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
+                                 BPF_NOEXIST);
+       if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
+                 "err:%d errno:%d\n", err, errno))
+               return err;
+
+       err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
+                                 BPF_NOEXIST);
+       if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
+                 "err:%d errno:%d\n", err, errno))
+               return err;
+
+       return 0;
+}
+
+static void test(void)
+{
+       int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i;
+       socklen_t addrlen = sizeof(struct sockaddr_in6);
+       char buf[DATA_LEN];
+
+       /* Prepare listen_fd */
+       listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+       /* start_server() has logged the error details */
+       if (CHECK_FAIL(listen_fd == -1))
+               goto done;
+
+       err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+       if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+                 errno))
+               goto done;
+       memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+
+       cli_fd = connect_to_fd(listen_fd, 0);
+       if (CHECK_FAIL(cli_fd == -1))
+               goto done;
+
+       err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
+       if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n",
+                 err, errno))
+               goto done;
+
+       accept_fd = accept(listen_fd, NULL, NULL);
+       if (CHECK(accept_fd == -1, "accept(listen_fd)",
+                 "accept_fd:%d errno:%d\n",
+                 accept_fd, errno))
+               goto done;
+
+       if (init_sk_storage(accept_fd, 0xeB9F))
+               goto done;
+
+       for (i = 0; i < 2; i++) {
+               /* Send some data from accept_fd to cli_fd.
+                * MSG_EOR to stop kernel from coalescing two pkts.
+                */
+               err = send(accept_fd, DATA, DATA_LEN, MSG_EOR);
+               if (CHECK(err != DATA_LEN, "send(accept_fd)",
+                         "err:%d errno:%d\n", err, errno))
+                       goto done;
+
+               err = recv(cli_fd, buf, DATA_LEN, 0);
+               if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n",
+                         err, errno))
+                       goto done;
+       }
+
+       shutdown(cli_fd, SHUT_WR);
+       err = recv(accept_fd, buf, 1, 0);
+       if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n",
+                 err, errno))
+               goto done;
+       shutdown(accept_fd, SHUT_WR);
+       err = recv(cli_fd, buf, 1, 0);
+       if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n",
+                 err, errno))
+               goto done;
+       check_sk_pkt_out_cnt(accept_fd, cli_fd);
+       check_result();
+
+done:
+       if (accept_fd != -1)
+               close(accept_fd);
+       if (cli_fd != -1)
+               close(cli_fd);
+       if (listen_fd != -1)
+               close(listen_fd);
+}
+
+void test_sock_fields(void)
+{
+       struct bpf_link *egress_link = NULL, *ingress_link = NULL;
+       int parent_cg_fd = -1, child_cg_fd = -1;
+
+       /* Create a cgroup, get fd, and join it */
+       parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
+       if (CHECK_FAIL(parent_cg_fd < 0))
+               return;
+       parent_cg_id = get_cgroup_id(PARENT_CGROUP);
+       if (CHECK_FAIL(!parent_cg_id))
+               goto done;
+
+       child_cg_fd = test__join_cgroup(CHILD_CGROUP);
+       if (CHECK_FAIL(child_cg_fd < 0))
+               goto done;
+       child_cg_id = get_cgroup_id(CHILD_CGROUP);
+       if (CHECK_FAIL(!child_cg_id))
+               goto done;
+
+       skel = test_sock_fields__open_and_load();
+       if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
+               goto done;
+
+       egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
+                                                child_cg_fd);
+       if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n",
+                 PTR_ERR(egress_link)))
+               goto done;
+
+       ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
+                                                 child_cg_fd);
+       if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n",
+                 PTR_ERR(ingress_link)))
+               goto done;
+
+       linum_map_fd = bpf_map__fd(skel->maps.linum_map);
+       sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
+       sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10);
+
+       test();
+
+done:
+       bpf_link__destroy(egress_link);
+       bpf_link__destroy(ingress_link);
+       test_sock_fields__destroy(skel);
+       if (child_cg_fd != -1)
+               close(child_cg_fd);
+       if (parent_cg_fd != -1)
+               close(parent_cg_fd);
+}
index 4b7a527..4c4224e 100644 (file)
@@ -8,8 +8,6 @@
 #include "test_sockmap_invalid_update.skel.h"
 #include "bpf_iter_sockmap.skel.h"
 
-#include "progs/bpf_iter_sockmap.h"
-
 #define TCP_REPAIR             19      /* TCP sock is under repair right now */
 
 #define TCP_REPAIR_ON          1
@@ -50,6 +48,37 @@ error:
        return -1;
 }
 
+static void compare_cookies(struct bpf_map *src, struct bpf_map *dst)
+{
+       __u32 i, max_entries = bpf_map__max_entries(src);
+       int err, duration = 0, src_fd, dst_fd;
+
+       src_fd = bpf_map__fd(src);
+       dst_fd = bpf_map__fd(dst);
+
+       for (i = 0; i < max_entries; i++) {
+               __u64 src_cookie, dst_cookie;
+
+               err = bpf_map_lookup_elem(src_fd, &i, &src_cookie);
+               if (err && errno == ENOENT) {
+                       err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
+                       CHECK(!err, "map_lookup_elem(dst)", "element %u not deleted\n", i);
+                       CHECK(err && errno != ENOENT, "map_lookup_elem(dst)", "%s\n",
+                             strerror(errno));
+                       continue;
+               }
+               if (CHECK(err, "lookup_elem(src)", "%s\n", strerror(errno)))
+                       continue;
+
+               err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
+               if (CHECK(err, "lookup_elem(dst)", "%s\n", strerror(errno)))
+                       continue;
+
+               CHECK(dst_cookie != src_cookie, "cookie mismatch",
+                     "%llu != %llu (pos %u)\n", dst_cookie, src_cookie, i);
+       }
+}
+
 /* Create a map, populate it with one socket, and free the map. */
 static void test_sockmap_create_update_free(enum bpf_map_type map_type)
 {
@@ -109,9 +138,9 @@ out:
 static void test_sockmap_update(enum bpf_map_type map_type)
 {
        struct bpf_prog_test_run_attr tattr;
-       int err, prog, src, dst, duration = 0;
+       int err, prog, src, duration = 0;
        struct test_sockmap_update *skel;
-       __u64 src_cookie, dst_cookie;
+       struct bpf_map *dst_map;
        const __u32 zero = 0;
        char dummy[14] = {0};
        __s64 sk;
@@ -127,18 +156,14 @@ static void test_sockmap_update(enum bpf_map_type map_type)
        prog = bpf_program__fd(skel->progs.copy_sock_map);
        src = bpf_map__fd(skel->maps.src);
        if (map_type == BPF_MAP_TYPE_SOCKMAP)
-               dst = bpf_map__fd(skel->maps.dst_sock_map);
+               dst_map = skel->maps.dst_sock_map;
        else
-               dst = bpf_map__fd(skel->maps.dst_sock_hash);
+               dst_map = skel->maps.dst_sock_hash;
 
        err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST);
        if (CHECK(err, "update_elem(src)", "errno=%u\n", errno))
                goto out;
 
-       err = bpf_map_lookup_elem(src, &zero, &src_cookie);
-       if (CHECK(err, "lookup_elem(src, cookie)", "errno=%u\n", errno))
-               goto out;
-
        tattr = (struct bpf_prog_test_run_attr){
                .prog_fd = prog,
                .repeat = 1,
@@ -151,12 +176,7 @@ static void test_sockmap_update(enum bpf_map_type map_type)
                       "errno=%u retval=%u\n", errno, tattr.retval))
                goto out;
 
-       err = bpf_map_lookup_elem(dst, &zero, &dst_cookie);
-       if (CHECK(err, "lookup_elem(dst, cookie)", "errno=%u\n", errno))
-               goto out;
-
-       CHECK(dst_cookie != src_cookie, "cookie mismatch", "%llu != %llu\n",
-             dst_cookie, src_cookie);
+       compare_cookies(skel->maps.src, dst_map);
 
 out:
        test_sockmap_update__destroy(skel);
@@ -174,14 +194,14 @@ static void test_sockmap_invalid_update(void)
                test_sockmap_invalid_update__destroy(skel);
 }
 
-static void test_sockmap_iter(enum bpf_map_type map_type)
+static void test_sockmap_copy(enum bpf_map_type map_type)
 {
        DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
        int err, len, src_fd, iter_fd, duration = 0;
        union bpf_iter_link_info linfo = {0};
-       __s64 sock_fd[SOCKMAP_MAX_ENTRIES];
-       __u32 i, num_sockets, max_elems;
+       __u32 i, num_sockets, num_elems;
        struct bpf_iter_sockmap *skel;
+       __s64 *sock_fd = NULL;
        struct bpf_link *link;
        struct bpf_map *src;
        char buf[64];
@@ -190,22 +210,23 @@ static void test_sockmap_iter(enum bpf_map_type map_type)
        if (CHECK(!skel, "bpf_iter_sockmap__open_and_load", "skeleton open_and_load failed\n"))
                return;
 
-       for (i = 0; i < ARRAY_SIZE(sock_fd); i++)
-               sock_fd[i] = -1;
-
-       /* Make sure we have at least one "empty" entry to test iteration of
-        * an empty slot.
-        */
-       num_sockets = ARRAY_SIZE(sock_fd) - 1;
-
        if (map_type == BPF_MAP_TYPE_SOCKMAP) {
                src = skel->maps.sockmap;
-               max_elems = bpf_map__max_entries(src);
+               num_elems = bpf_map__max_entries(src);
+               num_sockets = num_elems - 1;
        } else {
                src = skel->maps.sockhash;
-               max_elems = num_sockets;
+               num_elems = bpf_map__max_entries(src) - 1;
+               num_sockets = num_elems;
        }
 
+       sock_fd = calloc(num_sockets, sizeof(*sock_fd));
+       if (CHECK(!sock_fd, "calloc(sock_fd)", "failed to allocate\n"))
+               goto out;
+
+       for (i = 0; i < num_sockets; i++)
+               sock_fd[i] = -1;
+
        src_fd = bpf_map__fd(src);
 
        for (i = 0; i < num_sockets; i++) {
@@ -221,7 +242,7 @@ static void test_sockmap_iter(enum bpf_map_type map_type)
        linfo.map.map_fd = src_fd;
        opts.link_info = &linfo;
        opts.link_info_len = sizeof(linfo);
-       link = bpf_program__attach_iter(skel->progs.count_elems, &opts);
+       link = bpf_program__attach_iter(skel->progs.copy, &opts);
        if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
                goto out;
 
@@ -236,23 +257,26 @@ static void test_sockmap_iter(enum bpf_map_type map_type)
                goto close_iter;
 
        /* test results */
-       if (CHECK(skel->bss->elems != max_elems, "elems", "got %u expected %u\n",
-                 skel->bss->elems, max_elems))
+       if (CHECK(skel->bss->elems != num_elems, "elems", "got %u expected %u\n",
+                 skel->bss->elems, num_elems))
                goto close_iter;
 
        if (CHECK(skel->bss->socks != num_sockets, "socks", "got %u expected %u\n",
                  skel->bss->socks, num_sockets))
                goto close_iter;
 
+       compare_cookies(src, skel->maps.dst);
+
 close_iter:
        close(iter_fd);
 free_link:
        bpf_link__destroy(link);
 out:
-       for (i = 0; i < num_sockets; i++) {
+       for (i = 0; sock_fd && i < num_sockets; i++)
                if (sock_fd[i] >= 0)
                        close(sock_fd[i]);
-       }
+       if (sock_fd)
+               free(sock_fd);
        bpf_iter_sockmap__destroy(skel);
 }
 
@@ -272,8 +296,8 @@ void test_sockmap_basic(void)
                test_sockmap_update(BPF_MAP_TYPE_SOCKHASH);
        if (test__start_subtest("sockmap update in unsafe context"))
                test_sockmap_invalid_update();
-       if (test__start_subtest("sockmap iter"))
-               test_sockmap_iter(BPF_MAP_TYPE_SOCKMAP);
-       if (test__start_subtest("sockhash iter"))
-               test_sockmap_iter(BPF_MAP_TYPE_SOCKHASH);
+       if (test__start_subtest("sockmap copy"))
+               test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP);
+       if (test__start_subtest("sockhash copy"))
+               test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH);
 }
index 2702df2..9966685 100644 (file)
@@ -61,10 +61,9 @@ void test_test_overhead(void)
        const char *raw_tp_name = "raw_tp/task_rename";
        const char *fentry_name = "fentry/__set_task_comm";
        const char *fexit_name = "fexit/__set_task_comm";
-       const char *fmodret_name = "fmod_ret/__set_task_comm";
        const char *kprobe_func = "__set_task_comm";
        struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog;
-       struct bpf_program *fentry_prog, *fexit_prog, *fmodret_prog;
+       struct bpf_program *fentry_prog, *fexit_prog;
        struct bpf_object *obj;
        struct bpf_link *link;
        int err, duration = 0;
@@ -97,11 +96,6 @@ void test_test_overhead(void)
        if (CHECK(!fexit_prog, "find_probe",
                  "prog '%s' not found\n", fexit_name))
                goto cleanup;
-       fmodret_prog = bpf_object__find_program_by_title(obj, fmodret_name);
-       if (CHECK(!fmodret_prog, "find_probe",
-                 "prog '%s' not found\n", fmodret_name))
-               goto cleanup;
-
        err = bpf_object__load(obj);
        if (CHECK(err, "obj_load", "err %d\n", err))
                goto cleanup;
@@ -148,12 +142,6 @@ void test_test_overhead(void)
        test_run("fexit");
        bpf_link__destroy(link);
 
-       /* attach fmod_ret */
-       link = bpf_program__attach_trace(fmodret_prog);
-       if (CHECK(IS_ERR(link), "attach fmod_ret", "err %ld\n", PTR_ERR(link)))
-               goto cleanup;
-       test_run("fmod_ret");
-       bpf_link__destroy(link);
 cleanup:
        prctl(PR_SET_NAME, comm, 0L, 0L, 0L);
        bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
new file mode 100644 (file)
index 0000000..924441d
--- /dev/null
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/stat.h>
+#include <linux/sched.h>
+#include <sys/syscall.h>
+
+#include "test_pkt_md_access.skel.h"
+#include "test_trace_ext.skel.h"
+#include "test_trace_ext_tracing.skel.h"
+
+static __u32 duration;
+
+void test_trace_ext(void)
+{
+       struct test_pkt_md_access *skel_pkt = NULL;
+       struct test_trace_ext_tracing *skel_trace = NULL;
+       struct test_trace_ext_tracing__bss *bss_trace;
+       struct test_trace_ext *skel_ext = NULL;
+       struct test_trace_ext__bss *bss_ext;
+       int err, pkt_fd, ext_fd;
+       struct bpf_program *prog;
+       char buf[100];
+       __u32 retval;
+       __u64 len;
+
+       /* open/load/attach test_pkt_md_access */
+       skel_pkt = test_pkt_md_access__open_and_load();
+       if (CHECK(!skel_pkt, "setup", "classifier/test_pkt_md_access open failed\n"))
+               goto cleanup;
+
+       err = test_pkt_md_access__attach(skel_pkt);
+       if (CHECK(err, "setup", "classifier/test_pkt_md_access attach failed: %d\n", err))
+               goto cleanup;
+
+       prog = skel_pkt->progs.test_pkt_md_access;
+       pkt_fd = bpf_program__fd(prog);
+
+       /* open extension */
+       skel_ext = test_trace_ext__open();
+       if (CHECK(!skel_ext, "setup", "freplace/test_pkt_md_access open failed\n"))
+               goto cleanup;
+
+       /* set extension's attach target - test_pkt_md_access  */
+       prog = skel_ext->progs.test_pkt_md_access_new;
+       bpf_program__set_attach_target(prog, pkt_fd, "test_pkt_md_access");
+
+       /* load/attach extension */
+       err = test_trace_ext__load(skel_ext);
+       if (CHECK(err, "setup", "freplace/test_pkt_md_access load failed\n")) {
+               libbpf_strerror(err, buf, sizeof(buf));
+               fprintf(stderr, "%s\n", buf);
+               goto cleanup;
+       }
+
+       err = test_trace_ext__attach(skel_ext);
+       if (CHECK(err, "setup", "freplace/test_pkt_md_access attach failed: %d\n", err))
+               goto cleanup;
+
+       prog = skel_ext->progs.test_pkt_md_access_new;
+       ext_fd = bpf_program__fd(prog);
+
+       /* open tracing  */
+       skel_trace = test_trace_ext_tracing__open();
+       if (CHECK(!skel_trace, "setup", "tracing/test_pkt_md_access_new open failed\n"))
+               goto cleanup;
+
+       /* set tracing's attach target - fentry */
+       prog = skel_trace->progs.fentry;
+       bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new");
+
+       /* set tracing's attach target - fexit */
+       prog = skel_trace->progs.fexit;
+       bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new");
+
+       /* load/attach tracing */
+       err = test_trace_ext_tracing__load(skel_trace);
+       if (CHECK(err, "setup", "tracing/test_pkt_md_access_new load failed\n")) {
+               libbpf_strerror(err, buf, sizeof(buf));
+               fprintf(stderr, "%s\n", buf);
+               goto cleanup;
+       }
+
+       err = test_trace_ext_tracing__attach(skel_trace);
+       if (CHECK(err, "setup", "tracing/test_pkt_md_access_new attach failed: %d\n", err))
+               goto cleanup;
+
+       /* trigger the test */
+       err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
+                               NULL, NULL, &retval, &duration);
+       CHECK(err || retval, "run", "err %d errno %d retval %d\n", err, errno, retval);
+
+       bss_ext = skel_ext->bss;
+       bss_trace = skel_trace->bss;
+
+       len = bss_ext->ext_called;
+
+       CHECK(bss_ext->ext_called == 0,
+               "check", "failed to trigger freplace/test_pkt_md_access\n");
+       CHECK(bss_trace->fentry_called != len,
+               "check", "failed to trigger fentry/test_pkt_md_access_new\n");
+       CHECK(bss_trace->fexit_called != len,
+               "check", "failed to trigger fexit/test_pkt_md_access_new\n");
+
+cleanup:
+       test_trace_ext_tracing__destroy(skel_trace);
+       test_trace_ext__destroy(skel_ext);
+       test_pkt_md_access__destroy(skel_pkt);
+}
index ef57408..6939bfd 100644 (file)
@@ -15,6 +15,8 @@
  */
 
 #include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
 #include "bpf_tcp_helpers.h"
 
 char _license[] SEC("license") = "GPL";
index 3fb4260..4dc1a96 100644 (file)
@@ -9,6 +9,8 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_tcp_helpers.h"
index de6de92..5a65f6b 100644 (file)
@@ -118,18 +118,18 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
 
        switch (proto) {
        case bpf_htons(ETH_P_IP):
-               bpf_tail_call(skb, &jmp_table, IP);
+               bpf_tail_call_static(skb, &jmp_table, IP);
                break;
        case bpf_htons(ETH_P_IPV6):
-               bpf_tail_call(skb, &jmp_table, IPV6);
+               bpf_tail_call_static(skb, &jmp_table, IPV6);
                break;
        case bpf_htons(ETH_P_MPLS_MC):
        case bpf_htons(ETH_P_MPLS_UC):
-               bpf_tail_call(skb, &jmp_table, MPLS);
+               bpf_tail_call_static(skb, &jmp_table, MPLS);
                break;
        case bpf_htons(ETH_P_8021Q):
        case bpf_htons(ETH_P_8021AD):
-               bpf_tail_call(skb, &jmp_table, VLAN);
+               bpf_tail_call_static(skb, &jmp_table, VLAN);
                break;
        default:
                /* Protocol not supported */
@@ -246,10 +246,10 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
        switch (nexthdr) {
        case IPPROTO_HOPOPTS:
        case IPPROTO_DSTOPTS:
-               bpf_tail_call(skb, &jmp_table, IPV6OP);
+               bpf_tail_call_static(skb, &jmp_table, IPV6OP);
                break;
        case IPPROTO_FRAGMENT:
-               bpf_tail_call(skb, &jmp_table, IPV6FR);
+               bpf_tail_call_static(skb, &jmp_table, IPV6FR);
                break;
        default:
                return parse_ip_proto(skb, nexthdr);
index df682af..6a12554 100644 (file)
 #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
 #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
 #define bpf_iter__sockmap bpf_iter__sockmap___not_used
+#define btf_ptr btf_ptr___not_used
+#define BTF_F_COMPACT BTF_F_COMPACT___not_used
+#define BTF_F_NONAME BTF_F_NONAME___not_used
+#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
+#define BTF_F_ZERO BTF_F_ZERO___not_used
 #include "vmlinux.h"
 #undef bpf_iter_meta
 #undef bpf_iter__bpf_map
 #undef bpf_iter__bpf_map_elem
 #undef bpf_iter__bpf_sk_storage_map
 #undef bpf_iter__sockmap
+#undef btf_ptr
+#undef BTF_F_COMPACT
+#undef BTF_F_NONAME
+#undef BTF_F_PTR_RAW
+#undef BTF_F_ZERO
 
 struct bpf_iter_meta {
        struct seq_file *seq;
@@ -105,3 +115,16 @@ struct bpf_iter__sockmap {
        void *key;
        struct sock *sk;
 };
+
+struct btf_ptr {
+       void *ptr;
+       __u32 type_id;
+       __u32 flags;
+};
+
+enum {
+       BTF_F_COMPACT   =       (1ULL << 0),
+       BTF_F_NONAME    =       (1ULL << 1),
+       BTF_F_PTR_RAW   =       (1ULL << 2),
+       BTF_F_ZERO      =       (1ULL << 3),
+};
index 0e27f73..f3af0e3 100644 (file)
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Cloudflare */
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
-#include "bpf_iter_sockmap.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include <errno.h>
@@ -11,33 +10,50 @@ char _license[] SEC("license") = "GPL";
 
 struct {
        __uint(type, BPF_MAP_TYPE_SOCKMAP);
-       __uint(max_entries, SOCKMAP_MAX_ENTRIES);
+       __uint(max_entries, 64);
        __type(key, __u32);
        __type(value, __u64);
 } sockmap SEC(".maps");
 
 struct {
        __uint(type, BPF_MAP_TYPE_SOCKHASH);
-       __uint(max_entries, SOCKMAP_MAX_ENTRIES);
+       __uint(max_entries, 64);
        __type(key, __u32);
        __type(value, __u64);
 } sockhash SEC(".maps");
 
+struct {
+       __uint(type, BPF_MAP_TYPE_SOCKHASH);
+       __uint(max_entries, 64);
+       __type(key, __u32);
+       __type(value, __u64);
+} dst SEC(".maps");
+
 __u32 elems = 0;
 __u32 socks = 0;
 
 SEC("iter/sockmap")
-int count_elems(struct bpf_iter__sockmap *ctx)
+int copy(struct bpf_iter__sockmap *ctx)
 {
        struct sock *sk = ctx->sk;
        __u32 tmp, *key = ctx->key;
        int ret;
 
-       if (key)
-               elems++;
+       if (!key)
+               return 0;
+
+       elems++;
+
+       /* We need a temporary buffer on the stack, since the verifier doesn't
+        * let us use the pointer from the context as an argument to the helper.
+        */
+       tmp = *key;
 
-       if (sk)
+       if (sk) {
                socks++;
+               return bpf_map_update_elem(&dst, &tmp, sk, 0) != 0;
+       }
 
-       return 0;
+       ret = bpf_map_delete_elem(&dst, &tmp);
+       return ret && ret != -ENOENT;
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h
deleted file mode 100644 (file)
index 35a675d..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#define SOCKMAP_MAX_ENTRIES (64)
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
new file mode 100644 (file)
index 0000000..a1ddc36
--- /dev/null
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+long tasks = 0;
+long seq_err = 0;
+bool skip = false;
+
+SEC("iter/task")
+int dump_task_struct(struct bpf_iter__task *ctx)
+{
+       struct seq_file *seq = ctx->meta->seq;
+       struct task_struct *task = ctx->task;
+       static struct btf_ptr ptr = { };
+       long ret;
+
+#if __has_builtin(__builtin_btf_type_id)
+       ptr.type_id = bpf_core_type_id_kernel(struct task_struct);
+       ptr.ptr = task;
+
+       if (ctx->meta->seq_num == 0)
+               BPF_SEQ_PRINTF(seq, "Raw BTF task\n");
+
+       ret = bpf_seq_printf_btf(seq, &ptr, sizeof(ptr), 0);
+       switch (ret) {
+       case 0:
+               tasks++;
+               break;
+       case -ERANGE:
+               /* NULL task or task->fs, don't count it as an error. */
+               break;
+       case -E2BIG:
+               return 1;
+       default:
+               seq_err = ret;
+               break;
+       }
+#else
+       skip = true;
+#endif
+
+       return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_ptr.h b/tools/testing/selftests/bpf/progs/btf_ptr.h
new file mode 100644 (file)
index 0000000..c3c9797
--- /dev/null
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define btf_ptr btf_ptr___not_used
+#define BTF_F_COMPACT BTF_F_COMPACT___not_used
+#define BTF_F_NONAME BTF_F_NONAME___not_used
+#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
+#define BTF_F_ZERO BTF_F_ZERO___not_used
+#include "vmlinux.h"
+#undef btf_ptr
+#undef BTF_F_COMPACT
+#undef BTF_F_NONAME
+#undef BTF_F_PTR_RAW
+#undef BTF_F_ZERO
+
+struct btf_ptr {
+       void *ptr;
+       __u32 type_id;
+       __u32 flags;
+};
+
+enum {
+       BTF_F_COMPACT   =       (1ULL << 0),
+       BTF_F_NONAME    =       (1ULL << 1),
+       BTF_F_PTR_RAW   =       (1ULL << 2),
+       BTF_F_ZERO      =       (1ULL << 3),
+};
diff --git a/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c
new file mode 100644 (file)
index 0000000..c8943cc
--- /dev/null
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+volatile __u64 test_fmod_ret = 0;
+SEC("fmod_ret/security_new_get_constant")
+int BPF_PROG(fmod_ret_test, long val, int ret)
+{
+       test_fmod_ret = 1;
+       return 120;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_get_constant.c b/tools/testing/selftests/bpf/progs/freplace_get_constant.c
new file mode 100644 (file)
index 0000000..705e4b6
--- /dev/null
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+volatile __u64 test_get_constant = 0;
+SEC("freplace/get_constant")
+int security_new_get_constant(long val)
+{
+       if (val != 123)
+               return 0;
+       test_get_constant = 1;
+       return test_get_constant; /* original get_constant() returns val - 122 */
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
new file mode 100644 (file)
index 0000000..6b67003
--- /dev/null
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+
+#include "btf_ptr.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include <errno.h>
+
+long ret = 0;
+int num_subtests = 0;
+int ran_subtests = 0;
+bool skip = false;
+
+#define STRSIZE                        2048
+#define EXPECTED_STRSIZE       256
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x)  (sizeof(x) / sizeof((x)[0]))
+#endif
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, char[STRSIZE]);
+} strdata SEC(".maps");
+
+static int __strncmp(const void *m1, const void *m2, size_t len)
+{
+       const unsigned char *s1 = m1;
+       const unsigned char *s2 = m2;
+       int i, delta = 0;
+
+       for (i = 0; i < len; i++) {
+               delta = s1[i] - s2[i];
+               if (delta || s1[i] == 0 || s2[i] == 0)
+                       break;
+       }
+       return delta;
+}
+
+#if __has_builtin(__builtin_btf_type_id)
+#define        TEST_BTF(_str, _type, _flags, _expected, ...)                   \
+       do {                                                            \
+               static const char _expectedval[EXPECTED_STRSIZE] =      \
+                                                       _expected;      \
+               static const char _ptrtype[64] = #_type;                \
+               __u64 _hflags = _flags | BTF_F_COMPACT;                 \
+               static _type _ptrdata = __VA_ARGS__;                    \
+               static struct btf_ptr _ptr = { };                       \
+               int _cmp;                                               \
+                                                                       \
+               ++num_subtests;                                         \
+               if (ret < 0)                                            \
+                       break;                                          \
+               ++ran_subtests;                                         \
+               _ptr.ptr = &_ptrdata;                                   \
+               _ptr.type_id = bpf_core_type_id_kernel(_type);          \
+               if (_ptr.type_id <= 0) {                                \
+                       ret = -EINVAL;                                  \
+                       break;                                          \
+               }                                                       \
+               ret = bpf_snprintf_btf(_str, STRSIZE,                   \
+                                      &_ptr, sizeof(_ptr), _hflags);   \
+               if (ret)                                                \
+                       break;                                          \
+               _cmp = __strncmp(_str, _expectedval, EXPECTED_STRSIZE); \
+               if (_cmp != 0) {                                        \
+                       bpf_printk("(%d) got %s", _cmp, _str);          \
+                       bpf_printk("(%d) expected %s", _cmp,            \
+                                  _expectedval);                       \
+                       ret = -EBADMSG;                                 \
+                       break;                                          \
+               }                                                       \
+       } while (0)
+#endif
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_C(_str, _type, _flags, ...)                           \
+       TEST_BTF(_str, _type, _flags, "(" #_type ")" #__VA_ARGS__,      \
+                __VA_ARGS__)
+
+/* TRACE_EVENT(netif_receive_skb,
+ *     TP_PROTO(struct sk_buff *skb),
+ */
+SEC("tp_btf/netif_receive_skb")
+int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb)
+{
+       static __u64 flags[] = { 0, BTF_F_COMPACT, BTF_F_ZERO, BTF_F_PTR_RAW,
+                                BTF_F_NONAME, BTF_F_COMPACT | BTF_F_ZERO |
+                                BTF_F_PTR_RAW | BTF_F_NONAME };
+       static struct btf_ptr p = { };
+       __u32 key = 0;
+       int i, __ret;
+       char *str;
+
+#if __has_builtin(__builtin_btf_type_id)
+       str = bpf_map_lookup_elem(&strdata, &key);
+       if (!str)
+               return 0;
+
+       /* Ensure we can write skb string representation */
+       p.type_id = bpf_core_type_id_kernel(struct sk_buff);
+       p.ptr = skb;
+       for (i = 0; i < ARRAY_SIZE(flags); i++) {
+               ++num_subtests;
+               ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
+               if (ret < 0)
+                       bpf_printk("returned %d when writing skb", ret);
+               ++ran_subtests;
+       }
+
+       /* Check invalid ptr value */
+       p.ptr = 0;
+       __ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
+       if (__ret >= 0) {
+               bpf_printk("printing NULL should generate error, got (%d)",
+                          __ret);
+               ret = -ERANGE;
+       }
+
+       /* Verify type display for various types. */
+
+       /* simple int */
+       TEST_BTF_C(str, int, 0, 1234);
+       TEST_BTF(str, int, BTF_F_NONAME, "1234", 1234);
+       /* zero value should be printed at toplevel */
+       TEST_BTF(str, int, 0, "(int)0", 0);
+       TEST_BTF(str, int, BTF_F_NONAME, "0", 0);
+       TEST_BTF(str, int, BTF_F_ZERO, "(int)0", 0);
+       TEST_BTF(str, int, BTF_F_NONAME | BTF_F_ZERO, "0", 0);
+       TEST_BTF_C(str, int, 0, -4567);
+       TEST_BTF(str, int, BTF_F_NONAME, "-4567", -4567);
+
+       /* simple char */
+       TEST_BTF_C(str, char, 0, 100);
+       TEST_BTF(str, char, BTF_F_NONAME, "100", 100);
+       /* zero value should be printed at toplevel */
+       TEST_BTF(str, char, 0, "(char)0", 0);
+       TEST_BTF(str, char, BTF_F_NONAME, "0", 0);
+       TEST_BTF(str, char, BTF_F_ZERO, "(char)0", 0);
+       TEST_BTF(str, char, BTF_F_NONAME | BTF_F_ZERO, "0", 0);
+
+       /* simple typedef */
+       TEST_BTF_C(str, uint64_t, 0, 100);
+       TEST_BTF(str, u64, BTF_F_NONAME, "1", 1);
+       /* zero value should be printed at toplevel */
+       TEST_BTF(str, u64, 0, "(u64)0", 0);
+       TEST_BTF(str, u64, BTF_F_NONAME, "0", 0);
+       TEST_BTF(str, u64, BTF_F_ZERO, "(u64)0", 0);
+       TEST_BTF(str, u64, BTF_F_NONAME|BTF_F_ZERO, "0", 0);
+
+       /* typedef struct */
+       TEST_BTF_C(str, atomic_t, 0, {.counter = (int)1,});
+       TEST_BTF(str, atomic_t, BTF_F_NONAME, "{1,}", {.counter = 1,});
+       /* typedef with 0 value should be printed at toplevel */
+       TEST_BTF(str, atomic_t, 0, "(atomic_t){}", {.counter = 0,});
+       TEST_BTF(str, atomic_t, BTF_F_NONAME, "{}", {.counter = 0,});
+       TEST_BTF(str, atomic_t, BTF_F_ZERO, "(atomic_t){.counter = (int)0,}",
+                {.counter = 0,});
+       TEST_BTF(str, atomic_t, BTF_F_NONAME|BTF_F_ZERO,
+                "{0,}", {.counter = 0,});
+
+       /* enum where enum value does (and does not) exist */
+       TEST_BTF_C(str, enum bpf_cmd, 0, BPF_MAP_CREATE);
+       TEST_BTF(str, enum bpf_cmd, 0, "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+       TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "BPF_MAP_CREATE",
+                BPF_MAP_CREATE);
+       TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO,
+                "BPF_MAP_CREATE", 0);
+
+       TEST_BTF(str, enum bpf_cmd, BTF_F_ZERO, "(enum bpf_cmd)BPF_MAP_CREATE",
+                BPF_MAP_CREATE);
+       TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO,
+                "BPF_MAP_CREATE", BPF_MAP_CREATE);
+       TEST_BTF_C(str, enum bpf_cmd, 0, 2000);
+       TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "2000", 2000);
+
+       /* simple struct */
+       TEST_BTF_C(str, struct btf_enum, 0,
+                  {.name_off = (__u32)3,.val = (__s32)-1,});
+       TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{3,-1,}",
+                { .name_off = 3, .val = -1,});
+       TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{-1,}",
+                { .name_off = 0, .val = -1,});
+       TEST_BTF(str, struct btf_enum, BTF_F_NONAME|BTF_F_ZERO, "{0,-1,}",
+                { .name_off = 0, .val = -1,});
+       /* empty struct should be printed */
+       TEST_BTF(str, struct btf_enum, 0, "(struct btf_enum){}",
+                { .name_off = 0, .val = 0,});
+       TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{}",
+                { .name_off = 0, .val = 0,});
+       TEST_BTF(str, struct btf_enum, BTF_F_ZERO,
+                "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+                { .name_off = 0, .val = 0,});
+
+       /* struct with pointers */
+       TEST_BTF(str, struct list_head, BTF_F_PTR_RAW,
+                "(struct list_head){.next = (struct list_head *)0x0000000000000001,}",
+                { .next = (struct list_head *)1 });
+       /* NULL pointer should not be displayed */
+       TEST_BTF(str, struct list_head, BTF_F_PTR_RAW,
+                "(struct list_head){}",
+                { .next = (struct list_head *)0 });
+
+       /* struct with char array */
+       TEST_BTF(str, struct bpf_prog_info, 0,
+                "(struct bpf_prog_info){.name = (char[])['f','o','o',],}",
+                { .name = "foo",});
+       TEST_BTF(str, struct bpf_prog_info, BTF_F_NONAME,
+                "{['f','o','o',],}",
+                {.name = "foo",});
+       /* leading null char means do not display string */
+       TEST_BTF(str, struct bpf_prog_info, 0,
+                "(struct bpf_prog_info){}",
+                {.name = {'\0', 'f', 'o', 'o'}});
+       /* handle non-printable characters */
+       TEST_BTF(str, struct bpf_prog_info, 0,
+                "(struct bpf_prog_info){.name = (char[])[1,2,3,],}",
+                { .name = {1, 2, 3, 0}});
+
+       /* struct with non-char array */
+       TEST_BTF(str, struct __sk_buff, 0,
+                "(struct __sk_buff){.cb = (__u32[])[1,2,3,4,5,],}",
+                { .cb = {1, 2, 3, 4, 5,},});
+       TEST_BTF(str, struct __sk_buff, BTF_F_NONAME,
+                "{[1,2,3,4,5,],}",
+                { .cb = { 1, 2, 3, 4, 5},});
+       /* For non-char, arrays, show non-zero values only */
+       TEST_BTF(str, struct __sk_buff, 0,
+                "(struct __sk_buff){.cb = (__u32[])[1,],}",
+                { .cb = { 0, 0, 1, 0, 0},});
+
+       /* struct with bitfields */
+       TEST_BTF_C(str, struct bpf_insn, 0,
+                  {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+       TEST_BTF(str, struct bpf_insn, BTF_F_NONAME, "{1,0x2,0x3,4,5,}",
+                {.code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+                 .imm = 5,});
+#else
+       skip = true;
+#endif
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index 1f407e6..7115bce 100644 (file)
@@ -26,20 +26,20 @@ int entry(struct __sk_buff *skb)
        /* Multiple locations to make sure we patch
         * all of them.
         */
-       bpf_tail_call(skb, &jmp_table, 0);
-       bpf_tail_call(skb, &jmp_table, 0);
-       bpf_tail_call(skb, &jmp_table, 0);
-       bpf_tail_call(skb, &jmp_table, 0);
-
-       bpf_tail_call(skb, &jmp_table, 1);
-       bpf_tail_call(skb, &jmp_table, 1);
-       bpf_tail_call(skb, &jmp_table, 1);
-       bpf_tail_call(skb, &jmp_table, 1);
-
-       bpf_tail_call(skb, &jmp_table, 2);
-       bpf_tail_call(skb, &jmp_table, 2);
-       bpf_tail_call(skb, &jmp_table, 2);
-       bpf_tail_call(skb, &jmp_table, 2);
+       bpf_tail_call_static(skb, &jmp_table, 0);
+       bpf_tail_call_static(skb, &jmp_table, 0);
+       bpf_tail_call_static(skb, &jmp_table, 0);
+       bpf_tail_call_static(skb, &jmp_table, 0);
+
+       bpf_tail_call_static(skb, &jmp_table, 1);
+       bpf_tail_call_static(skb, &jmp_table, 1);
+       bpf_tail_call_static(skb, &jmp_table, 1);
+       bpf_tail_call_static(skb, &jmp_table, 1);
+
+       bpf_tail_call_static(skb, &jmp_table, 2);
+       bpf_tail_call_static(skb, &jmp_table, 2);
+       bpf_tail_call_static(skb, &jmp_table, 2);
+       bpf_tail_call_static(skb, &jmp_table, 2);
 
        return 3;
 }
index a093e73..0431e4f 100644 (file)
@@ -13,14 +13,14 @@ struct {
 SEC("classifier/0")
 int bpf_func_0(struct __sk_buff *skb)
 {
-       bpf_tail_call(skb, &jmp_table, 1);
+       bpf_tail_call_static(skb, &jmp_table, 1);
        return 0;
 }
 
 SEC("classifier/1")
 int bpf_func_1(struct __sk_buff *skb)
 {
-       bpf_tail_call(skb, &jmp_table, 2);
+       bpf_tail_call_static(skb, &jmp_table, 2);
        return 1;
 }
 
@@ -33,25 +33,25 @@ int bpf_func_2(struct __sk_buff *skb)
 SEC("classifier/3")
 int bpf_func_3(struct __sk_buff *skb)
 {
-       bpf_tail_call(skb, &jmp_table, 4);
+       bpf_tail_call_static(skb, &jmp_table, 4);
        return 3;
 }
 
 SEC("classifier/4")
 int bpf_func_4(struct __sk_buff *skb)
 {
-       bpf_tail_call(skb, &jmp_table, 3);
+       bpf_tail_call_static(skb, &jmp_table, 3);
        return 4;
 }
 
 SEC("classifier")
 int entry(struct __sk_buff *skb)
 {
-       bpf_tail_call(skb, &jmp_table, 0);
+       bpf_tail_call_static(skb, &jmp_table, 0);
        /* Check multi-prog update. */
-       bpf_tail_call(skb, &jmp_table, 2);
+       bpf_tail_call_static(skb, &jmp_table, 2);
        /* Check tail call limit. */
-       bpf_tail_call(skb, &jmp_table, 3);
+       bpf_tail_call_static(skb, &jmp_table, 3);
        return 3;
 }
 
index cabda87..739dc2a 100644 (file)
@@ -16,14 +16,14 @@ SEC("classifier/0")
 int bpf_func_0(struct __sk_buff *skb)
 {
        count++;
-       bpf_tail_call(skb, &jmp_table, 0);
+       bpf_tail_call_static(skb, &jmp_table, 0);
        return 1;
 }
 
 SEC("classifier")
 int entry(struct __sk_buff *skb)
 {
-       bpf_tail_call(skb, &jmp_table, 0);
+       bpf_tail_call_static(skb, &jmp_table, 0);
        return 0;
 }
 
index b5d9c8e..0103f3d 100644 (file)
@@ -21,7 +21,7 @@ TAIL_FUNC(1)
 static __noinline
 int subprog_tail(struct __sk_buff *skb)
 {
-       bpf_tail_call(skb, &jmp_table, 0);
+       bpf_tail_call_static(skb, &jmp_table, 0);
 
        return skb->len * 2;
 }
@@ -29,7 +29,7 @@ int subprog_tail(struct __sk_buff *skb)
 SEC("classifier")
 int entry(struct __sk_buff *skb)
 {
-       bpf_tail_call(skb, &jmp_table, 1);
+       bpf_tail_call_static(skb, &jmp_table, 1);
 
        return subprog_tail(skb);
 }
index a004ab2..7b1c041 100644 (file)
@@ -14,9 +14,9 @@ static __noinline
 int subprog_tail(struct __sk_buff *skb)
 {
        if (load_byte(skb, 0))
-               bpf_tail_call(skb, &jmp_table, 1);
+               bpf_tail_call_static(skb, &jmp_table, 1);
        else
-               bpf_tail_call(skb, &jmp_table, 0);
+               bpf_tail_call_static(skb, &jmp_table, 0);
        return 1;
 }
 
@@ -32,7 +32,7 @@ int bpf_func_0(struct __sk_buff *skb)
 SEC("classifier")
 int entry(struct __sk_buff *skb)
 {
-       bpf_tail_call(skb, &jmp_table, 0);
+       bpf_tail_call_static(skb, &jmp_table, 0);
 
        return 0;
 }
index 96dbef2..0d5482b 100644 (file)
@@ -16,9 +16,9 @@ int subprog_tail2(struct __sk_buff *skb)
        volatile char arr[64] = {};
 
        if (load_word(skb, 0) || load_half(skb, 0))
-               bpf_tail_call(skb, &jmp_table, 10);
+               bpf_tail_call_static(skb, &jmp_table, 10);
        else
-               bpf_tail_call(skb, &jmp_table, 1);
+               bpf_tail_call_static(skb, &jmp_table, 1);
 
        return skb->len;
 }
@@ -28,7 +28,7 @@ int subprog_tail(struct __sk_buff *skb)
 {
        volatile char arr[64] = {};
 
-       bpf_tail_call(skb, &jmp_table, 0);
+       bpf_tail_call_static(skb, &jmp_table, 0);
 
        return skb->len * 2;
 }
index 98b40a9..9a1b166 100644 (file)
@@ -14,21 +14,21 @@ static volatile int count;
 __noinline
 int subprog_tail_2(struct __sk_buff *skb)
 {
-       bpf_tail_call(skb, &jmp_table, 2);
+       bpf_tail_call_static(skb, &jmp_table, 2);
        return skb->len * 3;
 }
 
 __noinline
 int subprog_tail_1(struct __sk_buff *skb)
 {
-       bpf_tail_call(skb, &jmp_table, 1);
+       bpf_tail_call_static(skb, &jmp_table, 1);
        return skb->len * 2;
 }
 
 __noinline
 int subprog_tail(struct __sk_buff *skb)
 {
-       bpf_tail_call(skb, &jmp_table, 0);
+       bpf_tail_call_static(skb, &jmp_table, 0);
        return skb->len;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
new file mode 100644 (file)
index 0000000..9a6b85d
--- /dev/null
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <string.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tcp_helpers.h"
+
+struct sockaddr_in6 srv_sa6 = {};
+__u16 listen_tp_sport = 0;
+__u16 req_sk_sport = 0;
+__u32 recv_cookie = 0;
+__u32 gen_cookie = 0;
+__u32 linum = 0;
+
+#define LOG() ({ if (!linum) linum = __LINE__; })
+
+static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th,
+                                 struct tcp_sock *tp,
+                                 struct __sk_buff *skb)
+{
+       if (th->syn) {
+               __s64 mss_cookie;
+               void *data_end;
+
+               data_end = (void *)(long)(skb->data_end);
+
+               if (th->doff * 4 != 40) {
+                       LOG();
+                       return;
+               }
+
+               if ((void *)th + 40 > data_end) {
+                       LOG();
+                       return;
+               }
+
+               mss_cookie = bpf_tcp_gen_syncookie(tp, ip6h, sizeof(*ip6h),
+                                                  th, 40);
+               if (mss_cookie < 0) {
+                       if (mss_cookie != -ENOENT)
+                               LOG();
+               } else {
+                       gen_cookie = (__u32)mss_cookie;
+               }
+       } else if (gen_cookie) {
+               /* It was in cookie mode */
+               int ret = bpf_tcp_check_syncookie(tp, ip6h, sizeof(*ip6h),
+                                                 th, sizeof(*th));
+
+               if (ret < 0) {
+                       if (ret != -ENOENT)
+                               LOG();
+               } else {
+                       recv_cookie = bpf_ntohl(th->ack_seq) - 1;
+               }
+       }
+}
+
+static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb)
+{
+       struct bpf_sock_tuple *tuple;
+       struct bpf_sock *bpf_skc;
+       unsigned int tuple_len;
+       struct tcphdr *th;
+       void *data_end;
+
+       data_end = (void *)(long)(skb->data_end);
+
+       th = (struct tcphdr *)(ip6h + 1);
+       if (th + 1 > data_end)
+               return TC_ACT_OK;
+
+       /* Is it the testing traffic? */
+       if (th->dest != srv_sa6.sin6_port)
+               return TC_ACT_OK;
+
+       tuple_len = sizeof(tuple->ipv6);
+       tuple = (struct bpf_sock_tuple *)&ip6h->saddr;
+       if ((void *)tuple + tuple_len > data_end) {
+               LOG();
+               return TC_ACT_OK;
+       }
+
+       bpf_skc = bpf_skc_lookup_tcp(skb, tuple, tuple_len,
+                                    BPF_F_CURRENT_NETNS, 0);
+       if (!bpf_skc) {
+               LOG();
+               return TC_ACT_OK;
+       }
+
+       if (bpf_skc->state == BPF_TCP_NEW_SYN_RECV) {
+               struct request_sock *req_sk;
+
+               req_sk = (struct request_sock *)bpf_skc_to_tcp_request_sock(bpf_skc);
+               if (!req_sk) {
+                       LOG();
+                       goto release;
+               }
+
+               if (bpf_sk_assign(skb, req_sk, 0)) {
+                       LOG();
+                       goto release;
+               }
+
+               req_sk_sport = req_sk->__req_common.skc_num;
+
+               bpf_sk_release(req_sk);
+               return TC_ACT_OK;
+       } else if (bpf_skc->state == BPF_TCP_LISTEN) {
+               struct tcp_sock *tp;
+
+               tp = bpf_skc_to_tcp_sock(bpf_skc);
+               if (!tp) {
+                       LOG();
+                       goto release;
+               }
+
+               if (bpf_sk_assign(skb, tp, 0)) {
+                       LOG();
+                       goto release;
+               }
+
+               listen_tp_sport = tp->inet_conn.icsk_inet.sk.__sk_common.skc_num;
+
+               test_syncookie_helper(ip6h, th, tp, skb);
+               bpf_sk_release(tp);
+               return TC_ACT_OK;
+       }
+
+       if (bpf_sk_assign(skb, bpf_skc, 0))
+               LOG();
+
+release:
+       bpf_sk_release(bpf_skc);
+       return TC_ACT_OK;
+}
+
+SEC("classifier/ingress")
+int cls_ingress(struct __sk_buff *skb)
+{
+       struct ipv6hdr *ip6h;
+       struct ethhdr *eth;
+       void *data_end;
+
+       data_end = (void *)(long)(skb->data_end);
+
+       eth = (struct ethhdr *)(long)(skb->data);
+       if (eth + 1 > data_end)
+               return TC_ACT_OK;
+
+       if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+               return TC_ACT_OK;
+
+       ip6h = (struct ipv6hdr *)(eth + 1);
+       if (ip6h + 1 > data_end)
+               return TC_ACT_OK;
+
+       if (ip6h->nexthdr == IPPROTO_TCP)
+               return handle_ip6_tcp(ip6h, skb);
+
+       return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
index 42403d0..abb7344 100644 (file)
@@ -39,10 +39,4 @@ int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec)
        return 0;
 }
 
-SEC("fmod_ret/__set_task_comm")
-int BPF_PROG(prog6, struct task_struct *tsk, const char *buf, bool exec)
-{
-       return !tsk;
-}
-
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
new file mode 100644 (file)
index 0000000..fb22de7
--- /dev/null
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+       __uint(max_entries, 1);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+} array_1 SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+       __uint(max_entries, 1);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+       __uint(map_flags, BPF_F_PRESERVE_ELEMS);
+} array_2 SEC(".maps");
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(read_array_1)
+{
+       struct bpf_perf_event_value val;
+
+       return bpf_perf_event_read_value(&array_1, 0, &val, sizeof(val));
+}
+
+SEC("raw_tp/task_rename")
+int BPF_PROG(read_array_2)
+{
+       struct bpf_perf_event_value val;
+
+       return bpf_perf_event_read_value(&array_2, 0, &val, sizeof(val));
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c
new file mode 100644 (file)
index 0000000..4c63cc8
--- /dev/null
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u32 count = 0;
+__u32 on_cpu = 0xffffffff;
+
+SEC("raw_tp/task_rename")
+int BPF_PROG(rename, struct task_struct *task, char *comm)
+{
+
+       count++;
+       if ((__u64) task == 0x1234ULL && (__u64) comm == 0x5678ULL) {
+               on_cpu = bpf_get_smp_processor_id();
+               return (long)task + (long)comm;
+       }
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index bbf8296..1032b29 100644 (file)
 #define IP6(aaaa, bbbb, cccc, dddd)                    \
        { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) }
 
+/* Macros for least-significant byte and word accesses. */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define LSE_INDEX(index, size) (index)
+#else
+#define LSE_INDEX(index, size) ((size) - (index) - 1)
+#endif
+#define LSB(value, index)                              \
+       (((__u8 *)&(value))[LSE_INDEX((index), sizeof(value))])
+#define LSW(value, index)                              \
+       (((__u16 *)&(value))[LSE_INDEX((index), sizeof(value) / 2)])
+
 #define MAX_SOCKS 32
 
 struct {
@@ -369,171 +380,146 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 {
        struct bpf_sock *sk;
        int err, family;
-       __u16 *half;
-       __u8 *byte;
        bool v4;
 
        v4 = (ctx->family == AF_INET);
 
        /* Narrow loads from family field */
-       byte = (__u8 *)&ctx->family;
-       half = (__u16 *)&ctx->family;
-       if (byte[0] != (v4 ? AF_INET : AF_INET6) ||
-           byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+       if (LSB(ctx->family, 0) != (v4 ? AF_INET : AF_INET6) ||
+           LSB(ctx->family, 1) != 0 || LSB(ctx->family, 2) != 0 || LSB(ctx->family, 3) != 0)
                return SK_DROP;
-       if (half[0] != (v4 ? AF_INET : AF_INET6))
+       if (LSW(ctx->family, 0) != (v4 ? AF_INET : AF_INET6))
                return SK_DROP;
 
-       byte = (__u8 *)&ctx->protocol;
-       if (byte[0] != IPPROTO_TCP ||
-           byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+       /* Narrow loads from protocol field */
+       if (LSB(ctx->protocol, 0) != IPPROTO_TCP ||
+           LSB(ctx->protocol, 1) != 0 || LSB(ctx->protocol, 2) != 0 || LSB(ctx->protocol, 3) != 0)
                return SK_DROP;
-       half = (__u16 *)&ctx->protocol;
-       if (half[0] != IPPROTO_TCP)
+       if (LSW(ctx->protocol, 0) != IPPROTO_TCP)
                return SK_DROP;
 
        /* Narrow loads from remote_port field. Expect non-0 value. */
-       byte = (__u8 *)&ctx->remote_port;
-       if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0)
+       if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 &&
+           LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0)
                return SK_DROP;
-       half = (__u16 *)&ctx->remote_port;
-       if (half[0] == 0)
+       if (LSW(ctx->remote_port, 0) == 0)
                return SK_DROP;
 
        /* Narrow loads from local_port field. Expect DST_PORT. */
-       byte = (__u8 *)&ctx->local_port;
-       if (byte[0] != ((DST_PORT >> 0) & 0xff) ||
-           byte[1] != ((DST_PORT >> 8) & 0xff) ||
-           byte[2] != 0 || byte[3] != 0)
+       if (LSB(ctx->local_port, 0) != ((DST_PORT >> 0) & 0xff) ||
+           LSB(ctx->local_port, 1) != ((DST_PORT >> 8) & 0xff) ||
+           LSB(ctx->local_port, 2) != 0 || LSB(ctx->local_port, 3) != 0)
                return SK_DROP;
-       half = (__u16 *)&ctx->local_port;
-       if (half[0] != DST_PORT)
+       if (LSW(ctx->local_port, 0) != DST_PORT)
                return SK_DROP;
 
        /* Narrow loads from IPv4 fields */
        if (v4) {
                /* Expect non-0.0.0.0 in remote_ip4 */
-               byte = (__u8 *)&ctx->remote_ip4;
-               if (byte[0] == 0 && byte[1] == 0 &&
-                   byte[2] == 0 && byte[3] == 0)
+               if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 &&
+                   LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0)
                        return SK_DROP;
-               half = (__u16 *)&ctx->remote_ip4;
-               if (half[0] == 0 && half[1] == 0)
+               if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0)
                        return SK_DROP;
 
                /* Expect DST_IP4 in local_ip4 */
-               byte = (__u8 *)&ctx->local_ip4;
-               if (byte[0] != ((DST_IP4 >>  0) & 0xff) ||
-                   byte[1] != ((DST_IP4 >>  8) & 0xff) ||
-                   byte[2] != ((DST_IP4 >> 16) & 0xff) ||
-                   byte[3] != ((DST_IP4 >> 24) & 0xff))
+               if (LSB(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xff) ||
+                   LSB(ctx->local_ip4, 1) != ((DST_IP4 >> 8) & 0xff) ||
+                   LSB(ctx->local_ip4, 2) != ((DST_IP4 >> 16) & 0xff) ||
+                   LSB(ctx->local_ip4, 3) != ((DST_IP4 >> 24) & 0xff))
                        return SK_DROP;
-               half = (__u16 *)&ctx->local_ip4;
-               if (half[0] != ((DST_IP4 >>  0) & 0xffff) ||
-                   half[1] != ((DST_IP4 >> 16) & 0xffff))
+               if (LSW(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xffff) ||
+                   LSW(ctx->local_ip4, 1) != ((DST_IP4 >> 16) & 0xffff))
                        return SK_DROP;
        } else {
                /* Expect 0.0.0.0 IPs when family != AF_INET */
-               byte = (__u8 *)&ctx->remote_ip4;
-               if (byte[0] != 0 || byte[1] != 0 &&
-                   byte[2] != 0 || byte[3] != 0)
+               if (LSB(ctx->remote_ip4, 0) != 0 || LSB(ctx->remote_ip4, 1) != 0 ||
+                   LSB(ctx->remote_ip4, 2) != 0 || LSB(ctx->remote_ip4, 3) != 0)
                        return SK_DROP;
-               half = (__u16 *)&ctx->remote_ip4;
-               if (half[0] != 0 || half[1] != 0)
+               if (LSW(ctx->remote_ip4, 0) != 0 || LSW(ctx->remote_ip4, 1) != 0)
                        return SK_DROP;
 
-               byte = (__u8 *)&ctx->local_ip4;
-               if (byte[0] != 0 || byte[1] != 0 &&
-                   byte[2] != 0 || byte[3] != 0)
+               if (LSB(ctx->local_ip4, 0) != 0 || LSB(ctx->local_ip4, 1) != 0 ||
+                   LSB(ctx->local_ip4, 2) != 0 || LSB(ctx->local_ip4, 3) != 0)
                        return SK_DROP;
-               half = (__u16 *)&ctx->local_ip4;
-               if (half[0] != 0 || half[1] != 0)
+               if (LSW(ctx->local_ip4, 0) != 0 || LSW(ctx->local_ip4, 1) != 0)
                        return SK_DROP;
        }
 
        /* Narrow loads from IPv6 fields */
        if (!v4) {
-               /* Expenct non-:: IP in remote_ip6 */
-               byte = (__u8 *)&ctx->remote_ip6;
-               if (byte[0] == 0 && byte[1] == 0 &&
-                   byte[2] == 0 && byte[3] == 0 &&
-                   byte[4] == 0 && byte[5] == 0 &&
-                   byte[6] == 0 && byte[7] == 0 &&
-                   byte[8] == 0 && byte[9] == 0 &&
-                   byte[10] == 0 && byte[11] == 0 &&
-                   byte[12] == 0 && byte[13] == 0 &&
-                   byte[14] == 0 && byte[15] == 0)
+               /* Expect non-:: IP in remote_ip6 */
+               if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 &&
+                   LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 &&
+                   LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 &&
+                   LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 &&
+                   LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 &&
+                   LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 &&
+                   LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 &&
+                   LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0)
                        return SK_DROP;
-               half = (__u16 *)&ctx->remote_ip6;
-               if (half[0] == 0 && half[1] == 0 &&
-                   half[2] == 0 && half[3] == 0 &&
-                   half[4] == 0 && half[5] == 0 &&
-                   half[6] == 0 && half[7] == 0)
+               if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 &&
+                   LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 &&
+                   LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 &&
+                   LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0)
                        return SK_DROP;
-
                /* Expect DST_IP6 in local_ip6 */
-               byte = (__u8 *)&ctx->local_ip6;
-               if (byte[0] != ((DST_IP6[0] >>  0) & 0xff) ||
-                   byte[1] != ((DST_IP6[0] >>  8) & 0xff) ||
-                   byte[2] != ((DST_IP6[0] >> 16) & 0xff) ||
-                   byte[3] != ((DST_IP6[0] >> 24) & 0xff) ||
-                   byte[4] != ((DST_IP6[1] >>  0) & 0xff) ||
-                   byte[5] != ((DST_IP6[1] >>  8) & 0xff) ||
-                   byte[6] != ((DST_IP6[1] >> 16) & 0xff) ||
-                   byte[7] != ((DST_IP6[1] >> 24) & 0xff) ||
-                   byte[8] != ((DST_IP6[2] >>  0) & 0xff) ||
-                   byte[9] != ((DST_IP6[2] >>  8) & 0xff) ||
-                   byte[10] != ((DST_IP6[2] >> 16) & 0xff) ||
-                   byte[11] != ((DST_IP6[2] >> 24) & 0xff) ||
-                   byte[12] != ((DST_IP6[3] >>  0) & 0xff) ||
-                   byte[13] != ((DST_IP6[3] >>  8) & 0xff) ||
-                   byte[14] != ((DST_IP6[3] >> 16) & 0xff) ||
-                   byte[15] != ((DST_IP6[3] >> 24) & 0xff))
+               if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) ||
+                   LSB(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 8) & 0xff) ||
+                   LSB(ctx->local_ip6[0], 2) != ((DST_IP6[0] >> 16) & 0xff) ||
+                   LSB(ctx->local_ip6[0], 3) != ((DST_IP6[0] >> 24) & 0xff) ||
+                   LSB(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xff) ||
+                   LSB(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 8) & 0xff) ||
+                   LSB(ctx->local_ip6[1], 2) != ((DST_IP6[1] >> 16) & 0xff) ||
+                   LSB(ctx->local_ip6[1], 3) != ((DST_IP6[1] >> 24) & 0xff) ||
+                   LSB(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xff) ||
+                   LSB(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 8) & 0xff) ||
+                   LSB(ctx->local_ip6[2], 2) != ((DST_IP6[2] >> 16) & 0xff) ||
+                   LSB(ctx->local_ip6[2], 3) != ((DST_IP6[2] >> 24) & 0xff) ||
+                   LSB(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xff) ||
+                   LSB(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 8) & 0xff) ||
+                   LSB(ctx->local_ip6[3], 2) != ((DST_IP6[3] >> 16) & 0xff) ||
+                   LSB(ctx->local_ip6[3], 3) != ((DST_IP6[3] >> 24) & 0xff))
                        return SK_DROP;
-               half = (__u16 *)&ctx->local_ip6;
-               if (half[0] != ((DST_IP6[0] >>  0) & 0xffff) ||
-                   half[1] != ((DST_IP6[0] >> 16) & 0xffff) ||
-                   half[2] != ((DST_IP6[1] >>  0) & 0xffff) ||
-                   half[3] != ((DST_IP6[1] >> 16) & 0xffff) ||
-                   half[4] != ((DST_IP6[2] >>  0) & 0xffff) ||
-                   half[5] != ((DST_IP6[2] >> 16) & 0xffff) ||
-                   half[6] != ((DST_IP6[3] >>  0) & 0xffff) ||
-                   half[7] != ((DST_IP6[3] >> 16) & 0xffff))
+               if (LSW(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xffff) ||
+                   LSW(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 16) & 0xffff) ||
+                   LSW(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xffff) ||
+                   LSW(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 16) & 0xffff) ||
+                   LSW(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xffff) ||
+                   LSW(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 16) & 0xffff) ||
+                   LSW(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xffff) ||
+                   LSW(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 16) & 0xffff))
                        return SK_DROP;
        } else {
                /* Expect :: IPs when family != AF_INET6 */
-               byte = (__u8 *)&ctx->remote_ip6;
-               if (byte[0] != 0 || byte[1] != 0 ||
-                   byte[2] != 0 || byte[3] != 0 ||
-                   byte[4] != 0 || byte[5] != 0 ||
-                   byte[6] != 0 || byte[7] != 0 ||
-                   byte[8] != 0 || byte[9] != 0 ||
-                   byte[10] != 0 || byte[11] != 0 ||
-                   byte[12] != 0 || byte[13] != 0 ||
-                   byte[14] != 0 || byte[15] != 0)
+               if (LSB(ctx->remote_ip6[0], 0) != 0 || LSB(ctx->remote_ip6[0], 1) != 0 ||
+                   LSB(ctx->remote_ip6[0], 2) != 0 || LSB(ctx->remote_ip6[0], 3) != 0 ||
+                   LSB(ctx->remote_ip6[1], 0) != 0 || LSB(ctx->remote_ip6[1], 1) != 0 ||
+                   LSB(ctx->remote_ip6[1], 2) != 0 || LSB(ctx->remote_ip6[1], 3) != 0 ||
+                   LSB(ctx->remote_ip6[2], 0) != 0 || LSB(ctx->remote_ip6[2], 1) != 0 ||
+                   LSB(ctx->remote_ip6[2], 2) != 0 || LSB(ctx->remote_ip6[2], 3) != 0 ||
+                   LSB(ctx->remote_ip6[3], 0) != 0 || LSB(ctx->remote_ip6[3], 1) != 0 ||
+                   LSB(ctx->remote_ip6[3], 2) != 0 || LSB(ctx->remote_ip6[3], 3) != 0)
                        return SK_DROP;
-               half = (__u16 *)&ctx->remote_ip6;
-               if (half[0] != 0 || half[1] != 0 ||
-                   half[2] != 0 || half[3] != 0 ||
-                   half[4] != 0 || half[5] != 0 ||
-                   half[6] != 0 || half[7] != 0)
+               if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 ||
+                   LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 ||
+                   LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 ||
+                   LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0)
                        return SK_DROP;
 
-               byte = (__u8 *)&ctx->local_ip6;
-               if (byte[0] != 0 || byte[1] != 0 ||
-                   byte[2] != 0 || byte[3] != 0 ||
-                   byte[4] != 0 || byte[5] != 0 ||
-                   byte[6] != 0 || byte[7] != 0 ||
-                   byte[8] != 0 || byte[9] != 0 ||
-                   byte[10] != 0 || byte[11] != 0 ||
-                   byte[12] != 0 || byte[13] != 0 ||
-                   byte[14] != 0 || byte[15] != 0)
+               if (LSB(ctx->local_ip6[0], 0) != 0 || LSB(ctx->local_ip6[0], 1) != 0 ||
+                   LSB(ctx->local_ip6[0], 2) != 0 || LSB(ctx->local_ip6[0], 3) != 0 ||
+                   LSB(ctx->local_ip6[1], 0) != 0 || LSB(ctx->local_ip6[1], 1) != 0 ||
+                   LSB(ctx->local_ip6[1], 2) != 0 || LSB(ctx->local_ip6[1], 3) != 0 ||
+                   LSB(ctx->local_ip6[2], 0) != 0 || LSB(ctx->local_ip6[2], 1) != 0 ||
+                   LSB(ctx->local_ip6[2], 2) != 0 || LSB(ctx->local_ip6[2], 3) != 0 ||
+                   LSB(ctx->local_ip6[3], 0) != 0 || LSB(ctx->local_ip6[3], 1) != 0 ||
+                   LSB(ctx->local_ip6[3], 2) != 0 || LSB(ctx->local_ip6[3], 3) != 0)
                        return SK_DROP;
-               half = (__u16 *)&ctx->local_ip6;
-               if (half[0] != 0 || half[1] != 0 ||
-                   half[2] != 0 || half[3] != 0 ||
-                   half[4] != 0 || half[5] != 0 ||
-                   half[6] != 0 || half[7] != 0)
+               if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 ||
+                   LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 ||
+                   LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 ||
+                   LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0)
                        return SK_DROP;
        }
 
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
new file mode 100644 (file)
index 0000000..81b57b9
--- /dev/null
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <linux/bpf.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tcp_helpers.h"
+
+enum bpf_linum_array_idx {
+       EGRESS_LINUM_IDX,
+       INGRESS_LINUM_IDX,
+       __NR_BPF_LINUM_ARRAY_IDX,
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
+       __type(key, __u32);
+       __type(value, __u32);
+} linum_map SEC(".maps");
+
+struct bpf_spinlock_cnt {
+       struct bpf_spin_lock lock;
+       __u32 cnt;
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __type(key, int);
+       __type(value, struct bpf_spinlock_cnt);
+} sk_pkt_out_cnt SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __type(key, int);
+       __type(value, struct bpf_spinlock_cnt);
+} sk_pkt_out_cnt10 SEC(".maps");
+
+struct bpf_tcp_sock listen_tp = {};
+struct sockaddr_in6 srv_sa6 = {};
+struct bpf_tcp_sock cli_tp = {};
+struct bpf_tcp_sock srv_tp = {};
+struct bpf_sock listen_sk = {};
+struct bpf_sock srv_sk = {};
+struct bpf_sock cli_sk = {};
+__u64 parent_cg_id = 0;
+__u64 child_cg_id = 0;
+__u64 lsndtime = 0;
+
+static bool is_loopback6(__u32 *a6)
+{
+       return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
+}
+
+static void skcpy(struct bpf_sock *dst,
+                 const struct bpf_sock *src)
+{
+       dst->bound_dev_if = src->bound_dev_if;
+       dst->family = src->family;
+       dst->type = src->type;
+       dst->protocol = src->protocol;
+       dst->mark = src->mark;
+       dst->priority = src->priority;
+       dst->src_ip4 = src->src_ip4;
+       dst->src_ip6[0] = src->src_ip6[0];
+       dst->src_ip6[1] = src->src_ip6[1];
+       dst->src_ip6[2] = src->src_ip6[2];
+       dst->src_ip6[3] = src->src_ip6[3];
+       dst->src_port = src->src_port;
+       dst->dst_ip4 = src->dst_ip4;
+       dst->dst_ip6[0] = src->dst_ip6[0];
+       dst->dst_ip6[1] = src->dst_ip6[1];
+       dst->dst_ip6[2] = src->dst_ip6[2];
+       dst->dst_ip6[3] = src->dst_ip6[3];
+       dst->dst_port = src->dst_port;
+       dst->state = src->state;
+}
+
+static void tpcpy(struct bpf_tcp_sock *dst,
+                 const struct bpf_tcp_sock *src)
+{
+       dst->snd_cwnd = src->snd_cwnd;
+       dst->srtt_us = src->srtt_us;
+       dst->rtt_min = src->rtt_min;
+       dst->snd_ssthresh = src->snd_ssthresh;
+       dst->rcv_nxt = src->rcv_nxt;
+       dst->snd_nxt = src->snd_nxt;
+       dst->snd_una = src->snd_una;
+       dst->mss_cache = src->mss_cache;
+       dst->ecn_flags = src->ecn_flags;
+       dst->rate_delivered = src->rate_delivered;
+       dst->rate_interval_us = src->rate_interval_us;
+       dst->packets_out = src->packets_out;
+       dst->retrans_out = src->retrans_out;
+       dst->total_retrans = src->total_retrans;
+       dst->segs_in = src->segs_in;
+       dst->data_segs_in = src->data_segs_in;
+       dst->segs_out = src->segs_out;
+       dst->data_segs_out = src->data_segs_out;
+       dst->lost_out = src->lost_out;
+       dst->sacked_out = src->sacked_out;
+       dst->bytes_received = src->bytes_received;
+       dst->bytes_acked = src->bytes_acked;
+}
+
+/* Always return CG_OK so that no pkt will be filtered out */
+#define CG_OK 1
+
+#define RET_LOG() ({                                           \
+       linum = __LINE__;                                       \
+       bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST);       \
+       return CG_OK;                                           \
+})
+
+SEC("cgroup_skb/egress")
+int egress_read_sock_fields(struct __sk_buff *skb)
+{
+       struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
+       struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
+       struct bpf_tcp_sock *tp, *tp_ret;
+       struct bpf_sock *sk, *sk_ret;
+       __u32 linum, linum_idx;
+       struct tcp_sock *ktp;
+
+       linum_idx = EGRESS_LINUM_IDX;
+
+       sk = skb->sk;
+       if (!sk)
+               RET_LOG();
+
+       /* Not the testing egress traffic or
+        * TCP_LISTEN (10) socket will be copied at the ingress side.
+        */
+       if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
+           sk->state == 10)
+               return CG_OK;
+
+       if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
+               /* Server socket */
+               sk_ret = &srv_sk;
+               tp_ret = &srv_tp;
+       } else if (sk->dst_port == srv_sa6.sin6_port) {
+               /* Client socket */
+               sk_ret = &cli_sk;
+               tp_ret = &cli_tp;
+       } else {
+               /* Not the testing egress traffic */
+               return CG_OK;
+       }
+
+       /* It must be a fullsock for cgroup_skb/egress prog */
+       sk = bpf_sk_fullsock(sk);
+       if (!sk)
+               RET_LOG();
+
+       /* Not the testing egress traffic */
+       if (sk->protocol != IPPROTO_TCP)
+               return CG_OK;
+
+       tp = bpf_tcp_sock(sk);
+       if (!tp)
+               RET_LOG();
+
+       skcpy(sk_ret, sk);
+       tpcpy(tp_ret, tp);
+
+       if (sk_ret == &srv_sk) {
+               ktp = bpf_skc_to_tcp_sock(sk);
+
+               if (!ktp)
+                       RET_LOG();
+
+               lsndtime = ktp->lsndtime;
+
+               child_cg_id = bpf_sk_cgroup_id(ktp);
+               if (!child_cg_id)
+                       RET_LOG();
+
+               parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
+               if (!parent_cg_id)
+                       RET_LOG();
+
+               /* The userspace has created it for srv sk */
+               pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
+               pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
+                                                  0, 0);
+       } else {
+               pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
+                                                &cli_cnt_init,
+                                                BPF_SK_STORAGE_GET_F_CREATE);
+               pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10,
+                                                  sk, &cli_cnt_init,
+                                                  BPF_SK_STORAGE_GET_F_CREATE);
+       }
+
+       if (!pkt_out_cnt || !pkt_out_cnt10)
+               RET_LOG();
+
+       /* Even both cnt and cnt10 have lock defined in their BTF,
+        * intentionally one cnt takes lock while one does not
+        * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE.
+        */
+       pkt_out_cnt->cnt += 1;
+       bpf_spin_lock(&pkt_out_cnt10->lock);
+       pkt_out_cnt10->cnt += 10;
+       bpf_spin_unlock(&pkt_out_cnt10->lock);
+
+       return CG_OK;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress_read_sock_fields(struct __sk_buff *skb)
+{
+       struct bpf_tcp_sock *tp;
+       __u32 linum, linum_idx;
+       struct bpf_sock *sk;
+
+       linum_idx = INGRESS_LINUM_IDX;
+
+       sk = skb->sk;
+       if (!sk)
+               RET_LOG();
+
+       /* Not the testing ingress traffic to the server */
+       if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
+           sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
+               return CG_OK;
+
+       /* Only interested in TCP_LISTEN */
+       if (sk->state != 10)
+               return CG_OK;
+
+       /* It must be a fullsock for cgroup_skb/ingress prog */
+       sk = bpf_sk_fullsock(sk);
+       if (!sk)
+               RET_LOG();
+
+       tp = bpf_tcp_sock(sk);
+       if (!tp)
+               RET_LOG();
+
+       skcpy(&listen_sk, sk);
+       tpcpy(&listen_tp, tp);
+
+       return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
deleted file mode 100644 (file)
index 9bcaa37..0000000
+++ /dev/null
@@ -1,255 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2019 Facebook */
-
-#include <linux/bpf.h>
-#include <netinet/in.h>
-#include <stdbool.h>
-
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_endian.h>
-
-enum bpf_addr_array_idx {
-       ADDR_SRV_IDX,
-       ADDR_CLI_IDX,
-       __NR_BPF_ADDR_ARRAY_IDX,
-};
-
-enum bpf_result_array_idx {
-       EGRESS_SRV_IDX,
-       EGRESS_CLI_IDX,
-       INGRESS_LISTEN_IDX,
-       __NR_BPF_RESULT_ARRAY_IDX,
-};
-
-enum bpf_linum_array_idx {
-       EGRESS_LINUM_IDX,
-       INGRESS_LINUM_IDX,
-       __NR_BPF_LINUM_ARRAY_IDX,
-};
-
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX);
-       __type(key, __u32);
-       __type(value, struct sockaddr_in6);
-} addr_map SEC(".maps");
-
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
-       __type(key, __u32);
-       __type(value, struct bpf_sock);
-} sock_result_map SEC(".maps");
-
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
-       __type(key, __u32);
-       __type(value, struct bpf_tcp_sock);
-} tcp_sock_result_map SEC(".maps");
-
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
-       __type(key, __u32);
-       __type(value, __u32);
-} linum_map SEC(".maps");
-
-struct bpf_spinlock_cnt {
-       struct bpf_spin_lock lock;
-       __u32 cnt;
-};
-
-struct {
-       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
-       __uint(map_flags, BPF_F_NO_PREALLOC);
-       __type(key, int);
-       __type(value, struct bpf_spinlock_cnt);
-} sk_pkt_out_cnt SEC(".maps");
-
-struct {
-       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
-       __uint(map_flags, BPF_F_NO_PREALLOC);
-       __type(key, int);
-       __type(value, struct bpf_spinlock_cnt);
-} sk_pkt_out_cnt10 SEC(".maps");
-
-static bool is_loopback6(__u32 *a6)
-{
-       return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
-}
-
-static void skcpy(struct bpf_sock *dst,
-                 const struct bpf_sock *src)
-{
-       dst->bound_dev_if = src->bound_dev_if;
-       dst->family = src->family;
-       dst->type = src->type;
-       dst->protocol = src->protocol;
-       dst->mark = src->mark;
-       dst->priority = src->priority;
-       dst->src_ip4 = src->src_ip4;
-       dst->src_ip6[0] = src->src_ip6[0];
-       dst->src_ip6[1] = src->src_ip6[1];
-       dst->src_ip6[2] = src->src_ip6[2];
-       dst->src_ip6[3] = src->src_ip6[3];
-       dst->src_port = src->src_port;
-       dst->dst_ip4 = src->dst_ip4;
-       dst->dst_ip6[0] = src->dst_ip6[0];
-       dst->dst_ip6[1] = src->dst_ip6[1];
-       dst->dst_ip6[2] = src->dst_ip6[2];
-       dst->dst_ip6[3] = src->dst_ip6[3];
-       dst->dst_port = src->dst_port;
-       dst->state = src->state;
-}
-
-static void tpcpy(struct bpf_tcp_sock *dst,
-                 const struct bpf_tcp_sock *src)
-{
-       dst->snd_cwnd = src->snd_cwnd;
-       dst->srtt_us = src->srtt_us;
-       dst->rtt_min = src->rtt_min;
-       dst->snd_ssthresh = src->snd_ssthresh;
-       dst->rcv_nxt = src->rcv_nxt;
-       dst->snd_nxt = src->snd_nxt;
-       dst->snd_una = src->snd_una;
-       dst->mss_cache = src->mss_cache;
-       dst->ecn_flags = src->ecn_flags;
-       dst->rate_delivered = src->rate_delivered;
-       dst->rate_interval_us = src->rate_interval_us;
-       dst->packets_out = src->packets_out;
-       dst->retrans_out = src->retrans_out;
-       dst->total_retrans = src->total_retrans;
-       dst->segs_in = src->segs_in;
-       dst->data_segs_in = src->data_segs_in;
-       dst->segs_out = src->segs_out;
-       dst->data_segs_out = src->data_segs_out;
-       dst->lost_out = src->lost_out;
-       dst->sacked_out = src->sacked_out;
-       dst->bytes_received = src->bytes_received;
-       dst->bytes_acked = src->bytes_acked;
-}
-
-#define RETURN {                                               \
-       linum = __LINE__;                                       \
-       bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \
-       return 1;                                               \
-}
-
-SEC("cgroup_skb/egress")
-int egress_read_sock_fields(struct __sk_buff *skb)
-{
-       struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
-       __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx;
-       struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
-       struct sockaddr_in6 *srv_sa6, *cli_sa6;
-       struct bpf_tcp_sock *tp, *tp_ret;
-       struct bpf_sock *sk, *sk_ret;
-       __u32 linum, linum_idx;
-
-       linum_idx = EGRESS_LINUM_IDX;
-
-       sk = skb->sk;
-       if (!sk || sk->state == 10)
-               RETURN;
-
-       sk = bpf_sk_fullsock(sk);
-       if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP ||
-           !is_loopback6(sk->src_ip6))
-               RETURN;
-
-       tp = bpf_tcp_sock(sk);
-       if (!tp)
-               RETURN;
-
-       srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
-       cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx);
-       if (!srv_sa6 || !cli_sa6)
-               RETURN;
-
-       if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
-               result_idx = EGRESS_SRV_IDX;
-       else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
-               result_idx = EGRESS_CLI_IDX;
-       else
-               RETURN;
-
-       sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
-       tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
-       if (!sk_ret || !tp_ret)
-               RETURN;
-
-       skcpy(sk_ret, sk);
-       tpcpy(tp_ret, tp);
-
-       if (result_idx == EGRESS_SRV_IDX) {
-               /* The userspace has created it for srv sk */
-               pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0);
-               pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk,
-                                                  0, 0);
-       } else {
-               pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
-                                                &cli_cnt_init,
-                                                BPF_SK_STORAGE_GET_F_CREATE);
-               pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10,
-                                                  sk, &cli_cnt_init,
-                                                  BPF_SK_STORAGE_GET_F_CREATE);
-       }
-
-       if (!pkt_out_cnt || !pkt_out_cnt10)
-               RETURN;
-
-       /* Even both cnt and cnt10 have lock defined in their BTF,
-        * intentionally one cnt takes lock while one does not
-        * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE.
-        */
-       pkt_out_cnt->cnt += 1;
-       bpf_spin_lock(&pkt_out_cnt10->lock);
-       pkt_out_cnt10->cnt += 10;
-       bpf_spin_unlock(&pkt_out_cnt10->lock);
-
-       RETURN;
-}
-
-SEC("cgroup_skb/ingress")
-int ingress_read_sock_fields(struct __sk_buff *skb)
-{
-       __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX;
-       struct bpf_tcp_sock *tp, *tp_ret;
-       struct bpf_sock *sk, *sk_ret;
-       struct sockaddr_in6 *srv_sa6;
-       __u32 linum, linum_idx;
-
-       linum_idx = INGRESS_LINUM_IDX;
-
-       sk = skb->sk;
-       if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6))
-               RETURN;
-
-       srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
-       if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port))
-               RETURN;
-
-       if (sk->state != 10 && sk->state != 12)
-               RETURN;
-
-       sk = bpf_get_listener_sock(sk);
-       if (!sk)
-               RETURN;
-
-       tp = bpf_tcp_sock(sk);
-       if (!tp)
-               RETURN;
-
-       sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
-       tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
-       if (!sk_ret || !tp_ret)
-               RETURN;
-
-       skcpy(sk_ret, sk);
-       tpcpy(tp_ret, tp);
-
-       RETURN;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
new file mode 100644 (file)
index 0000000..889a72c
--- /dev/null
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#ifndef barrier_data
+# define barrier_data(ptr)     asm volatile("": :"r"(ptr) :"memory")
+#endif
+
+#ifndef ctx_ptr
+# define ctx_ptr(field)                (void *)(long)(field)
+#endif
+
+#define dst_to_src_tmp         0xeeddddeeU
+#define src_to_dst_tmp         0xeeffffeeU
+
+#define ip4_src                        0xac100164 /* 172.16.1.100 */
+#define ip4_dst                        0xac100264 /* 172.16.2.100 */
+
+#define ip6_src                        { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+                                 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+#define ip6_dst                        { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+                                 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+
+#ifndef v6_equal
+# define v6_equal(a, b)                (a.s6_addr32[0] == b.s6_addr32[0] && \
+                                a.s6_addr32[1] == b.s6_addr32[1] && \
+                                a.s6_addr32[2] == b.s6_addr32[2] && \
+                                a.s6_addr32[3] == b.s6_addr32[3])
+#endif
+
+static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
+                                           __be32 addr)
+{
+       void *data_end = ctx_ptr(skb->data_end);
+       void *data = ctx_ptr(skb->data);
+       struct iphdr *ip4h;
+
+       if (data + sizeof(struct ethhdr) > data_end)
+               return false;
+
+       ip4h = (struct iphdr *)(data + sizeof(struct ethhdr));
+       if ((void *)(ip4h + 1) > data_end)
+               return false;
+
+       return ip4h->daddr == addr;
+}
+
+static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
+                                           struct in6_addr addr)
+{
+       void *data_end = ctx_ptr(skb->data_end);
+       void *data = ctx_ptr(skb->data);
+       struct ipv6hdr *ip6h;
+
+       if (data + sizeof(struct ethhdr) > data_end)
+               return false;
+
+       ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr));
+       if ((void *)(ip6h + 1) > data_end)
+               return false;
+
+       return v6_equal(ip6h->daddr, addr);
+}
+
+SEC("chk_neigh") int tc_chk(struct __sk_buff *skb)
+{
+       void *data_end = ctx_ptr(skb->data_end);
+       void *data = ctx_ptr(skb->data);
+       __u32 *raw = data;
+
+       if (data + sizeof(struct ethhdr) > data_end)
+               return TC_ACT_SHOT;
+
+       return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+{
+       int idx = dst_to_src_tmp;
+       __u8 zero[ETH_ALEN * 2];
+       bool redirect = false;
+
+       switch (skb->protocol) {
+       case __bpf_constant_htons(ETH_P_IP):
+               redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src));
+               break;
+       case __bpf_constant_htons(ETH_P_IPV6):
+               redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src);
+               break;
+       }
+
+       if (!redirect)
+               return TC_ACT_OK;
+
+       barrier_data(&idx);
+       idx = bpf_ntohl(idx);
+
+       __builtin_memset(&zero, 0, sizeof(zero));
+       if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+               return TC_ACT_SHOT;
+
+       return bpf_redirect_neigh(idx, 0);
+}
+
+SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+{
+       int idx = src_to_dst_tmp;
+       __u8 zero[ETH_ALEN * 2];
+       bool redirect = false;
+
+       switch (skb->protocol) {
+       case __bpf_constant_htons(ETH_P_IP):
+               redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst));
+               break;
+       case __bpf_constant_htons(ETH_P_IPV6):
+               redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst);
+               break;
+       }
+
+       if (!redirect)
+               return TC_ACT_OK;
+
+       barrier_data(&idx);
+       idx = bpf_ntohl(idx);
+
+       __builtin_memset(&zero, 0, sizeof(zero));
+       if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+               return TC_ACT_SHOT;
+
+       return bpf_redirect_neigh(idx, 0);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext.c b/tools/testing/selftests/bpf/progs/test_trace_ext.c
new file mode 100644 (file)
index 0000000..d19a634
--- /dev/null
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+
+__u64 ext_called = 0;
+
+SEC("freplace/test_pkt_md_access")
+int test_pkt_md_access_new(struct __sk_buff *skb)
+{
+       ext_called = skb->len;
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c
new file mode 100644 (file)
index 0000000..52f3baf
--- /dev/null
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u64 fentry_called = 0;
+
+SEC("fentry/test_pkt_md_access_new")
+int BPF_PROG(fentry, struct sk_buff *skb)
+{
+       fentry_called = skb->len;
+       return 0;
+}
+
+__u64 fexit_called = 0;
+
+SEC("fexit/test_pkt_md_access_new")
+int BPF_PROG(fexit, struct sk_buff *skb)
+{
+       fexit_called = skb->len;
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index dbb820d..238f5f6 100644 (file)
@@ -130,6 +130,69 @@ extern int test__join_cgroup(const char *path);
 #define CHECK_ATTR(condition, tag, format...) \
        _CHECK(condition, tag, tattr.duration, format)
 
+#define ASSERT_EQ(actual, expected, name) ({                           \
+       static int duration = 0;                                        \
+       typeof(actual) ___act = (actual);                               \
+       typeof(expected) ___exp = (expected);                           \
+       bool ___ok = ___act == ___exp;                                  \
+       CHECK(!___ok, (name),                                           \
+             "unexpected %s: actual %lld != expected %lld\n",          \
+             (name), (long long)(___act), (long long)(___exp));        \
+       ___ok;                                                          \
+})
+
+#define ASSERT_STREQ(actual, expected, name) ({                                \
+       static int duration = 0;                                        \
+       const char *___act = actual;                                    \
+       const char *___exp = expected;                                  \
+       bool ___ok = strcmp(___act, ___exp) == 0;                       \
+       CHECK(!___ok, (name),                                           \
+             "unexpected %s: actual '%s' != expected '%s'\n",          \
+             (name), ___act, ___exp);                                  \
+       ___ok;                                                          \
+})
+
+#define ASSERT_OK(res, name) ({                                                \
+       static int duration = 0;                                        \
+       long long ___res = (res);                                       \
+       bool ___ok = ___res == 0;                                       \
+       CHECK(!___ok, (name), "unexpected error: %lld\n", ___res);      \
+       ___ok;                                                          \
+})
+
+#define ASSERT_ERR(res, name) ({                                       \
+       static int duration = 0;                                        \
+       long long ___res = (res);                                       \
+       bool ___ok = ___res < 0;                                        \
+       CHECK(!___ok, (name), "unexpected success: %lld\n", ___res);    \
+       ___ok;                                                          \
+})
+
+#define ASSERT_NULL(ptr, name) ({                                      \
+       static int duration = 0;                                        \
+       const void *___res = (ptr);                                     \
+       bool ___ok = !___res;                                           \
+       CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res);      \
+       ___ok;                                                          \
+})
+
+#define ASSERT_OK_PTR(ptr, name) ({                                    \
+       static int duration = 0;                                        \
+       const void *___res = (ptr);                                     \
+       bool ___ok = !IS_ERR_OR_NULL(___res);                           \
+       CHECK(!___ok, (name),                                           \
+             "unexpected error: %ld\n", PTR_ERR(___res));              \
+       ___ok;                                                          \
+})
+
+#define ASSERT_ERR_PTR(ptr, name) ({                                   \
+       static int duration = 0;                                        \
+       const void *___res = (ptr);                                     \
+       bool ___ok = IS_ERR(___res)                                     \
+       CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res);      \
+       ___ok;                                                          \
+})
+
 static inline __u64 ptr_to_u64(const void *ptr)
 {
        return (__u64) (unsigned long) ptr;
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
deleted file mode 100644 (file)
index 6c9f269..0000000
+++ /dev/null
@@ -1,482 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2019 Facebook */
-
-#include <sys/socket.h>
-#include <sys/epoll.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-
-enum bpf_addr_array_idx {
-       ADDR_SRV_IDX,
-       ADDR_CLI_IDX,
-       __NR_BPF_ADDR_ARRAY_IDX,
-};
-
-enum bpf_result_array_idx {
-       EGRESS_SRV_IDX,
-       EGRESS_CLI_IDX,
-       INGRESS_LISTEN_IDX,
-       __NR_BPF_RESULT_ARRAY_IDX,
-};
-
-enum bpf_linum_array_idx {
-       EGRESS_LINUM_IDX,
-       INGRESS_LINUM_IDX,
-       __NR_BPF_LINUM_ARRAY_IDX,
-};
-
-struct bpf_spinlock_cnt {
-       struct bpf_spin_lock lock;
-       __u32 cnt;
-};
-
-#define CHECK(condition, tag, format...) ({                            \
-       int __ret = !!(condition);                                      \
-       if (__ret) {                                                    \
-               printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag);     \
-               printf(format);                                         \
-               printf("\n");                                           \
-               exit(-1);                                               \
-       }                                                               \
-})
-
-#define TEST_CGROUP "/test-bpf-sock-fields"
-#define DATA "Hello BPF!"
-#define DATA_LEN sizeof(DATA)
-
-static struct sockaddr_in6 srv_sa6, cli_sa6;
-static int sk_pkt_out_cnt10_fd;
-static int sk_pkt_out_cnt_fd;
-static int linum_map_fd;
-static int addr_map_fd;
-static int tp_map_fd;
-static int sk_map_fd;
-
-static __u32 addr_srv_idx = ADDR_SRV_IDX;
-static __u32 addr_cli_idx = ADDR_CLI_IDX;
-
-static __u32 egress_srv_idx = EGRESS_SRV_IDX;
-static __u32 egress_cli_idx = EGRESS_CLI_IDX;
-static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX;
-
-static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
-static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
-
-static void init_loopback6(struct sockaddr_in6 *sa6)
-{
-       memset(sa6, 0, sizeof(*sa6));
-       sa6->sin6_family = AF_INET6;
-       sa6->sin6_addr = in6addr_loopback;
-}
-
-static void print_sk(const struct bpf_sock *sk)
-{
-       char src_ip4[24], dst_ip4[24];
-       char src_ip6[64], dst_ip6[64];
-
-       inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
-       inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
-       inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
-       inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
-
-       printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
-              "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
-              "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
-              sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
-              sk->mark, sk->priority,
-              sk->src_ip4, src_ip4,
-              sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
-              src_ip6, sk->src_port,
-              sk->dst_ip4, dst_ip4,
-              sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
-              dst_ip6, ntohs(sk->dst_port));
-}
-
-static void print_tp(const struct bpf_tcp_sock *tp)
-{
-       printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
-              "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
-              "rate_delivered:%u rate_interval_us:%u packets_out:%u "
-              "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
-              "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
-              "bytes_received:%llu bytes_acked:%llu\n",
-              tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
-              tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
-              tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
-              tp->packets_out, tp->retrans_out, tp->total_retrans,
-              tp->segs_in, tp->data_segs_in, tp->segs_out,
-              tp->data_segs_out, tp->lost_out, tp->sacked_out,
-              tp->bytes_received, tp->bytes_acked);
-}
-
-static void check_result(void)
-{
-       struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
-       struct bpf_sock srv_sk, cli_sk, listen_sk;
-       __u32 ingress_linum, egress_linum;
-       int err;
-
-       err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
-                                 &egress_linum);
-       CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
-             "err:%d errno:%d", err, errno);
-
-       err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
-                                 &ingress_linum);
-       CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
-             "err:%d errno:%d", err, errno);
-
-       err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk);
-       CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)",
-             "err:%d errno:%d", err, errno);
-       err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp);
-       CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)",
-             "err:%d errno:%d", err, errno);
-
-       err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk);
-       CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)",
-             "err:%d errno:%d", err, errno);
-       err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp);
-       CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)",
-             "err:%d errno:%d", err, errno);
-
-       err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk);
-       CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)",
-             "err:%d errno:%d", err, errno);
-       err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp);
-       CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)",
-             "err:%d errno:%d", err, errno);
-
-       printf("listen_sk: ");
-       print_sk(&listen_sk);
-       printf("\n");
-
-       printf("srv_sk: ");
-       print_sk(&srv_sk);
-       printf("\n");
-
-       printf("cli_sk: ");
-       print_sk(&cli_sk);
-       printf("\n");
-
-       printf("listen_tp: ");
-       print_tp(&listen_tp);
-       printf("\n");
-
-       printf("srv_tp: ");
-       print_tp(&srv_tp);
-       printf("\n");
-
-       printf("cli_tp: ");
-       print_tp(&cli_tp);
-       printf("\n");
-
-       CHECK(listen_sk.state != 10 ||
-             listen_sk.family != AF_INET6 ||
-             listen_sk.protocol != IPPROTO_TCP ||
-             memcmp(listen_sk.src_ip6, &in6addr_loopback,
-                    sizeof(listen_sk.src_ip6)) ||
-             listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
-             listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
-             listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
-             listen_sk.dst_port,
-             "Unexpected listen_sk",
-             "Check listen_sk output. ingress_linum:%u",
-             ingress_linum);
-
-       CHECK(srv_sk.state == 10 ||
-             !srv_sk.state ||
-             srv_sk.family != AF_INET6 ||
-             srv_sk.protocol != IPPROTO_TCP ||
-             memcmp(srv_sk.src_ip6, &in6addr_loopback,
-                    sizeof(srv_sk.src_ip6)) ||
-             memcmp(srv_sk.dst_ip6, &in6addr_loopback,
-                    sizeof(srv_sk.dst_ip6)) ||
-             srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
-             srv_sk.dst_port != cli_sa6.sin6_port,
-             "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u",
-             egress_linum);
-
-       CHECK(cli_sk.state == 10 ||
-             !cli_sk.state ||
-             cli_sk.family != AF_INET6 ||
-             cli_sk.protocol != IPPROTO_TCP ||
-             memcmp(cli_sk.src_ip6, &in6addr_loopback,
-                    sizeof(cli_sk.src_ip6)) ||
-             memcmp(cli_sk.dst_ip6, &in6addr_loopback,
-                    sizeof(cli_sk.dst_ip6)) ||
-             cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
-             cli_sk.dst_port != srv_sa6.sin6_port,
-             "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u",
-             egress_linum);
-
-       CHECK(listen_tp.data_segs_out ||
-             listen_tp.data_segs_in ||
-             listen_tp.total_retrans ||
-             listen_tp.bytes_acked,
-             "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u",
-             ingress_linum);
-
-       CHECK(srv_tp.data_segs_out != 2 ||
-             srv_tp.data_segs_in ||
-             srv_tp.snd_cwnd != 10 ||
-             srv_tp.total_retrans ||
-             srv_tp.bytes_acked != 2 * DATA_LEN,
-             "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u",
-             egress_linum);
-
-       CHECK(cli_tp.data_segs_out ||
-             cli_tp.data_segs_in != 2 ||
-             cli_tp.snd_cwnd != 10 ||
-             cli_tp.total_retrans ||
-             cli_tp.bytes_received != 2 * DATA_LEN,
-             "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u",
-             egress_linum);
-}
-
-static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
-{
-       struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
-       int err;
-
-       pkt_out_cnt.cnt = ~0;
-       pkt_out_cnt10.cnt = ~0;
-       err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
-       if (!err)
-               err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
-                                         &pkt_out_cnt10);
-
-       /* The bpf prog only counts for fullsock and
-        * passive conneciton did not become fullsock until 3WHS
-        * had been finished.
-        * The bpf prog only counted two data packet out but we
-        * specially init accept_fd's pkt_out_cnt by 2 in
-        * init_sk_storage().  Hence, 4 here.
-        */
-       CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40,
-             "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
-             "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
-             err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
-
-       pkt_out_cnt.cnt = ~0;
-       pkt_out_cnt10.cnt = ~0;
-       err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
-       if (!err)
-               err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
-                                         &pkt_out_cnt10);
-       /* Active connection is fullsock from the beginning.
-        * 1 SYN and 1 ACK during 3WHS
-        * 2 Acks on data packet.
-        *
-        * The bpf_prog initialized it to 0xeB9F.
-        */
-       CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 ||
-             pkt_out_cnt10.cnt != 0xeB9F + 40,
-             "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
-             "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
-             err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
-}
-
-static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
-{
-       struct bpf_spinlock_cnt scnt = {};
-       int err;
-
-       scnt.cnt = pkt_out_cnt;
-       err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
-                                 BPF_NOEXIST);
-       CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
-             "err:%d errno:%d", err, errno);
-
-       scnt.cnt *= 10;
-       err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
-                                 BPF_NOEXIST);
-       CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
-             "err:%d errno:%d", err, errno);
-}
-
-static void test(void)
-{
-       int listen_fd, cli_fd, accept_fd, epfd, err;
-       struct epoll_event ev;
-       socklen_t addrlen;
-       int i;
-
-       addrlen = sizeof(struct sockaddr_in6);
-       ev.events = EPOLLIN;
-
-       epfd = epoll_create(1);
-       CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno);
-
-       /* Prepare listen_fd */
-       listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
-       CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d",
-             listen_fd, errno);
-
-       init_loopback6(&srv_sa6);
-       err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6));
-       CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno);
-
-       err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
-       CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno);
-
-       err = listen(listen_fd, 1);
-       CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno);
-
-       /* Prepare cli_fd */
-       cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
-       CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno);
-
-       init_loopback6(&cli_sa6);
-       err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6));
-       CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno);
-
-       err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
-       CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d",
-             err, errno);
-
-       /* Update addr_map with srv_sa6 and cli_sa6 */
-       err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0);
-       CHECK(err, "map_update", "err:%d errno:%d", err, errno);
-
-       err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0);
-       CHECK(err, "map_update", "err:%d errno:%d", err, errno);
-
-       /* Connect from cli_sa6 to srv_sa6 */
-       err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen);
-       printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n",
-              ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port));
-       CHECK(err && errno != EINPROGRESS,
-             "connect(cli_fd)", "err:%d errno:%d", err, errno);
-
-       ev.data.fd = listen_fd;
-       err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev);
-       CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d",
-             err, errno);
-
-       /* Accept the connection */
-       /* Have some timeout in accept(listen_fd). Just in case. */
-       err = epoll_wait(epfd, &ev, 1, 1000);
-       CHECK(err != 1 || ev.data.fd != listen_fd,
-             "epoll_wait(listen_fd)",
-             "err:%d errno:%d ev.data.fd:%d listen_fd:%d",
-             err, errno, ev.data.fd, listen_fd);
-
-       accept_fd = accept(listen_fd, NULL, NULL);
-       CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d",
-             accept_fd, errno);
-       close(listen_fd);
-
-       ev.data.fd = cli_fd;
-       err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev);
-       CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d",
-             err, errno);
-
-       init_sk_storage(accept_fd, 2);
-
-       for (i = 0; i < 2; i++) {
-               /* Send some data from accept_fd to cli_fd */
-               err = send(accept_fd, DATA, DATA_LEN, 0);
-               CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d",
-                     err, errno);
-
-               /* Have some timeout in recv(cli_fd). Just in case. */
-               err = epoll_wait(epfd, &ev, 1, 1000);
-               CHECK(err != 1 || ev.data.fd != cli_fd,
-                     "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d",
-                     err, errno, ev.data.fd, cli_fd);
-
-               err = recv(cli_fd, NULL, 0, MSG_TRUNC);
-               CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno);
-       }
-
-       check_sk_pkt_out_cnt(accept_fd, cli_fd);
-
-       close(epfd);
-       close(accept_fd);
-       close(cli_fd);
-
-       check_result();
-}
-
-int main(int argc, char **argv)
-{
-       struct bpf_prog_load_attr attr = {
-               .file = "test_sock_fields_kern.o",
-               .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-               .prog_flags = BPF_F_TEST_RND_HI32,
-       };
-       int cgroup_fd, egress_fd, ingress_fd, err;
-       struct bpf_program *ingress_prog;
-       struct bpf_object *obj;
-       struct bpf_map *map;
-
-       /* Create a cgroup, get fd, and join it */
-       cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
-       CHECK(cgroup_fd < 0, "cgroup_setup_and_join()",
-             "cgroup_fd:%d errno:%d", cgroup_fd, errno);
-       atexit(cleanup_cgroup_environment);
-
-       err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
-       CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
-
-       ingress_prog = bpf_object__find_program_by_title(obj,
-                                                        "cgroup_skb/ingress");
-       CHECK(!ingress_prog,
-             "bpf_object__find_program_by_title(cgroup_skb/ingress)",
-             "not found");
-       ingress_fd = bpf_program__fd(ingress_prog);
-
-       err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
-       CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
-             "err:%d errno%d", err, errno);
-
-       err = bpf_prog_attach(ingress_fd, cgroup_fd,
-                             BPF_CGROUP_INET_INGRESS, 0);
-       CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)",
-             "err:%d errno%d", err, errno);
-       close(cgroup_fd);
-
-       map = bpf_object__find_map_by_name(obj, "addr_map");
-       CHECK(!map, "cannot find addr_map", "(null)");
-       addr_map_fd = bpf_map__fd(map);
-
-       map = bpf_object__find_map_by_name(obj, "sock_result_map");
-       CHECK(!map, "cannot find sock_result_map", "(null)");
-       sk_map_fd = bpf_map__fd(map);
-
-       map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map");
-       CHECK(!map, "cannot find tcp_sock_result_map", "(null)");
-       tp_map_fd = bpf_map__fd(map);
-
-       map = bpf_object__find_map_by_name(obj, "linum_map");
-       CHECK(!map, "cannot find linum_map", "(null)");
-       linum_map_fd = bpf_map__fd(map);
-
-       map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt");
-       CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)");
-       sk_pkt_out_cnt_fd = bpf_map__fd(map);
-
-       map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10");
-       CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)");
-       sk_pkt_out_cnt10_fd = bpf_map__fd(map);
-
-       test();
-
-       bpf_object__close(obj);
-       cleanup_cgroup_environment();
-
-       printf("PASS\n");
-
-       return 0;
-}
diff --git a/tools/testing/selftests/bpf/test_tc_neigh.sh b/tools/testing/selftests/bpf/test_tc_neigh.sh
new file mode 100755 (executable)
index 0000000..31d8c3d
--- /dev/null
@@ -0,0 +1,168 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
+# between src and dst. The netns fwd has veth links to each src and dst. The
+# client is in src and server in dst. The test installs a TC BPF program to each
+# host facing veth in fwd which calls into bpf_redirect_peer() to perform the
+# neigh addr population and redirect; it also installs a dropper prog on the
+# egress side to drop skbs if neigh addrs were not populated.
+
+if [[ $EUID -ne 0 ]]; then
+       echo "This script must be run as root"
+       echo "FAIL"
+       exit 1
+fi
+
+# check that nc, dd, ping, ping6 and timeout are present
+command -v nc >/dev/null 2>&1 || \
+       { echo >&2 "nc is not available"; exit 1; }
+command -v dd >/dev/null 2>&1 || \
+       { echo >&2 "dd is not available"; exit 1; }
+command -v timeout >/dev/null 2>&1 || \
+       { echo >&2 "timeout is not available"; exit 1; }
+command -v ping >/dev/null 2>&1 || \
+       { echo >&2 "ping is not available"; exit 1; }
+command -v ping6 >/dev/null 2>&1 || \
+       { echo >&2 "ping6 is not available"; exit 1; }
+
+readonly GREEN='\033[0;92m'
+readonly RED='\033[0;31m'
+readonly NC='\033[0m' # No Color
+
+readonly PING_ARG="-c 3 -w 10 -q"
+
+readonly TIMEOUT=10
+
+readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
+readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
+readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
+
+readonly IP4_SRC="172.16.1.100"
+readonly IP4_DST="172.16.2.100"
+
+readonly IP6_SRC="::1:dead:beef:cafe"
+readonly IP6_DST="::2:dead:beef:cafe"
+
+readonly IP4_SLL="169.254.0.1"
+readonly IP4_DLL="169.254.0.2"
+readonly IP4_NET="169.254.0.0"
+
+cleanup()
+{
+       ip netns del ${NS_SRC}
+       ip netns del ${NS_FWD}
+       ip netns del ${NS_DST}
+}
+
+trap cleanup EXIT
+
+set -e
+
+ip netns add "${NS_SRC}"
+ip netns add "${NS_FWD}"
+ip netns add "${NS_DST}"
+
+ip link add veth_src type veth peer name veth_src_fwd
+ip link add veth_dst type veth peer name veth_dst_fwd
+
+ip link set veth_src netns ${NS_SRC}
+ip link set veth_src_fwd netns ${NS_FWD}
+
+ip link set veth_dst netns ${NS_DST}
+ip link set veth_dst_fwd netns ${NS_FWD}
+
+ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
+ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
+
+# The fwd netns automatically get a v6 LL address / routes, but also needs v4
+# one in order to start ARP probing. IP4_NET route is added to the endpoints
+# so that the ARP processing will reply.
+
+ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
+ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
+
+ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
+ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
+
+ip -netns ${NS_SRC} link set dev veth_src up
+ip -netns ${NS_FWD} link set dev veth_src_fwd up
+
+ip -netns ${NS_DST} link set dev veth_dst up
+ip -netns ${NS_FWD} link set dev veth_dst_fwd up
+
+ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
+ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
+ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
+
+ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
+ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
+
+ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
+ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
+ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
+
+ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
+ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
+
+fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
+fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
+
+ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
+ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
+
+ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
+ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
+
+veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex | awk '{printf "%08x\n", $1}')
+veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex | awk '{printf "%08x\n", $1}')
+
+xxd -p < test_tc_neigh.o   | sed "s/eeddddee/$veth_src/g" | xxd -r -p > test_tc_neigh.x.o
+xxd -p < test_tc_neigh.x.o | sed "s/eeffffee/$veth_dst/g" | xxd -r -p > test_tc_neigh.y.o
+
+ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
+ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj test_tc_neigh.y.o sec src_ingress
+ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress  bpf da obj test_tc_neigh.y.o sec chk_neigh
+
+ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
+ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj test_tc_neigh.y.o sec dst_ingress
+ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress  bpf da obj test_tc_neigh.y.o sec chk_neigh
+
+rm -f test_tc_neigh.x.o test_tc_neigh.y.o
+
+ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
+ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
+
+set +e
+
+TEST="TCPv4 connectivity test"
+ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
+if [ $? -ne 0 ]; then
+       echo -e "${TEST}: ${RED}FAIL${NC}"
+       exit 1
+fi
+echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+TEST="TCPv6 connectivity test"
+ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
+if [ $? -ne 0 ]; then
+       echo -e "${TEST}: ${RED}FAIL${NC}"
+       exit 1
+fi
+echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+TEST="ICMPv4 connectivity test"
+ip netns exec ${NS_SRC} ping  $PING_ARG ${IP4_DST}
+if [ $? -ne 0 ]; then
+       echo -e "${TEST}: ${RED}FAIL${NC}"
+       exit 1
+fi
+echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+TEST="ICMPv6 connectivity test"
+ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST}
+if [ $? -ne 0 ]; then
+       echo -e "${TEST}: ${RED}FAIL${NC}"
+       exit 1
+fi
+echo -e "${TEST}: ${GREEN}PASS${NC}"
index d781bc8..ca8fdb1 100644 (file)
        .result = REJECT,
        .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
+{
+       "check known subreg with unknown reg",
+       .insns = {
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+       BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 32),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFFFF1234),
+       /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */
+       BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 1, 1),
+       BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 512),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .result = ACCEPT,
+       .retval = 0
+},
index 056e027..006b5bd 100644 (file)
        .errstr = "Unreleased reference",
        .result = REJECT,
 },
+{
+       "reference tracking: bpf_sk_release(btf_tcp_sock)",
+       .insns = {
+       BPF_SK_LOOKUP(sk_lookup_tcp),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = ACCEPT,
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "unknown func",
+},
+{
+       "reference tracking: use ptr from bpf_skc_to_tcp_sock() after release",
+       .insns = {
+       BPF_SK_LOOKUP(sk_lookup_tcp),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "invalid mem access",
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "unknown func",
+},