Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
authorJakub Kicinski <kuba@kernel.org>
Sat, 9 Jul 2022 19:24:15 +0000 (12:24 -0700)
committerJakub Kicinski <kuba@kernel.org>
Sat, 9 Jul 2022 19:24:16 +0000 (12:24 -0700)
Daniel Borkmann says:

====================
pull-request: bpf-next 2022-07-09

We've added 94 non-merge commits during the last 19 day(s) which contain
a total of 125 files changed, 5141 insertions(+), 6701 deletions(-).

The main changes are:

1) Add new way for performing BTF type queries to BPF, from Daniel Müller.

2) Add inlining of calls to bpf_loop() helper when its function callback is
   statically known, from Eduard Zingerman.

3) Implement BPF TCP CC framework usability improvements, from Jörn-Thorben Hinz.

4) Add LSM flavor for attaching per-cgroup BPF programs to existing LSM
   hooks, from Stanislav Fomichev.

5) Remove all deprecated libbpf APIs in prep for 1.0 release, from Andrii Nakryiko.

6) Add benchmarks around local_storage to BPF selftests, from Dave Marchevsky.

7) AF_XDP sample removal (given move to libxdp) and various improvements around AF_XDP
   selftests, from Magnus Karlsson & Maciej Fijalkowski.

8) Add bpftool improvements for memcg probing and bash completion, from Quentin Monnet.

9) Add arm64 JIT support for BPF-2-BPF coupled with tail calls, from Jakub Sitnicki.

10) Sockmap optimizations around throughput of UDP transmissions which have been
    improved by 61%, from Cong Wang.

11) Rework perf's BPF prologue code to remove deprecated functions, from Jiri Olsa.

12) Fix sockmap teardown path to avoid sleepable sk_psock_stop, from John Fastabend.

13) Fix libbpf's cleanup around legacy kprobe/uprobe on error case, from Chuang Wang.

14) Fix libbpf's bpf_helpers.h to work with gcc for the case of its sec/pragma
    macro, from James Hilliard.

15) Fix libbpf's pt_regs macros for riscv to use a0 for RC register, from Yixun Lan.

16) Fix bpftool to show the name of type BPF_OBJ_LINK, from Yafang Shao.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (94 commits)
  selftests/bpf: Fix xdp_synproxy build failure if CONFIG_NF_CONNTRACK=m/n
  bpf: Correctly propagate errors up from bpf_core_composites_match
  libbpf: Disable SEC pragma macro on GCC
  bpf: Check attach_func_proto more carefully in check_return_code
  selftests/bpf: Add test involving restrict type qualifier
  bpftool: Add support for KIND_RESTRICT to gen min_core_btf command
  MAINTAINERS: Add entry for AF_XDP selftests files
  selftests, xsk: Rename AF_XDP testing app
  bpf, docs: Remove deprecated xsk libbpf APIs description
  selftests/bpf: Add benchmark for local_storage RCU Tasks Trace usage
  libbpf, riscv: Use a0 for RC register
  libbpf: Remove unnecessary usdt_rel_ip assignments
  selftests/bpf: Fix few more compiler warnings
  selftests/bpf: Fix bogus uninitialized variable warning
  bpftool: Remove zlib feature test from Makefile
  libbpf: Cleanup the legacy uprobe_event on failed add/attach_event()
  libbpf: Fix wrong variable used in perf_event_uprobe_open_legacy()
  libbpf: Cleanup the legacy kprobe_event on failed add/attach_event()
  selftests/bpf: Add type match test against kernel's task_struct
  selftests/bpf: Add nested type to type based tests
  ...
====================

Link: https://lore.kernel.org/r/20220708233145.32365-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
129 files changed:
Documentation/bpf/instruction-set.rst
Documentation/bpf/libbpf/libbpf_naming_convention.rst
MAINTAINERS
arch/arm64/net/bpf_jit_comp.c
arch/x86/net/bpf_jit_comp.c
include/linux/bpf-cgroup-defs.h
include/linux/bpf-cgroup.h
include/linux/bpf.h
include/linux/bpf_lsm.h
include/linux/bpf_verifier.h
include/linux/btf_ids.h
include/linux/filter.h
include/linux/net.h
include/net/tcp.h
include/net/udp.h
include/uapi/linux/bpf.h
kernel/bpf/bpf_iter.c
kernel/bpf/bpf_lsm.c
kernel/bpf/bpf_struct_ops.c
kernel/bpf/btf.c
kernel/bpf/cgroup.c
kernel/bpf/core.c
kernel/bpf/syscall.c
kernel/bpf/trampoline.c
kernel/bpf/verifier.c
kernel/trace/trace_uprobe.c
lib/test_bpf.c
net/core/filter.c
net/core/skmsg.c
net/core/sock_map.c
net/ipv4/af_inet.c
net/ipv4/bpf_tcp_ca.c
net/ipv4/tcp.c
net/ipv4/udp.c
net/ipv6/af_inet6.c
net/unix/af_unix.c
samples/bpf/Makefile
samples/bpf/xdp1_kern.c
samples/bpf/xdp2_kern.c
samples/bpf/xdp_tx_iptunnel_kern.c
samples/bpf/xdpsock.h [deleted file]
samples/bpf/xdpsock_ctrl_proc.c [deleted file]
samples/bpf/xdpsock_kern.c [deleted file]
samples/bpf/xdpsock_user.c [deleted file]
samples/bpf/xsk_fwd.c [deleted file]
tools/bpf/bpftool/Documentation/bpftool-feature.rst
tools/bpf/bpftool/Makefile
tools/bpf/bpftool/bash-completion/bpftool
tools/bpf/bpftool/cgroup.c
tools/bpf/bpftool/common.c
tools/bpf/bpftool/feature.c
tools/bpf/bpftool/gen.c
tools/bpf/bpftool/main.h
tools/include/linux/btf_ids.h
tools/include/uapi/linux/bpf.h
tools/lib/bpf/Build
tools/lib/bpf/Makefile
tools/lib/bpf/bpf.c
tools/lib/bpf/bpf.h
tools/lib/bpf/bpf_core_read.h
tools/lib/bpf/bpf_helpers.h
tools/lib/bpf/bpf_tracing.h
tools/lib/bpf/btf.c
tools/lib/bpf/btf.h
tools/lib/bpf/btf_dump.c
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/bpf/libbpf.map
tools/lib/bpf/libbpf_common.h
tools/lib/bpf/libbpf_internal.h
tools/lib/bpf/libbpf_legacy.h
tools/lib/bpf/libbpf_probes.c
tools/lib/bpf/netlink.c
tools/lib/bpf/relo_core.c
tools/lib/bpf/relo_core.h
tools/lib/bpf/usdt.c
tools/lib/bpf/xsk.c [deleted file]
tools/lib/bpf/xsk.h [deleted file]
tools/perf/util/bpf-loader.c
tools/testing/selftests/bpf/.gitignore
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bench.c
tools/testing/selftests/bpf/bench.h
tools/testing/selftests/bpf/benchs/bench_local_storage.c [new file with mode: 0644]
tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c [new file with mode: 0644]
tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh [new file with mode: 0755]
tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh [new file with mode: 0755]
tools/testing/selftests/bpf/benchs/run_common.sh
tools/testing/selftests/bpf/bpf_legacy.h
tools/testing/selftests/bpf/config
tools/testing/selftests/bpf/network_helpers.c
tools/testing/selftests/bpf/prog_tests/bpf_loop.c
tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
tools/testing/selftests/bpf/prog_tests/btf.c
tools/testing/selftests/bpf/prog_tests/core_reloc.c
tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
tools/testing/selftests/bpf/prog_tests/sock_fields.c
tools/testing/selftests/bpf/prog_tests/usdt.c
tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
tools/testing/selftests/bpf/progs/bpf_loop.c
tools/testing/selftests/bpf/progs/bpf_tracing_net.h
tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/core_reloc_types.h
tools/testing/selftests/bpf/progs/local_storage_bench.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/lsm_cgroup.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_btf_haskv.c [deleted file]
tools/testing/selftests/bpf/progs/test_btf_newkv.c
tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
tools/testing/selftests/bpf/test_bpftool_synctypes.py
tools/testing/selftests/bpf/test_btf.h
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/bpf/test_xsk.sh
tools/testing/selftests/bpf/verifier/bpf_loop_inline.c [new file with mode: 0644]
tools/testing/selftests/bpf/xdpxceiver.c [deleted file]
tools/testing/selftests/bpf/xdpxceiver.h [deleted file]
tools/testing/selftests/bpf/xsk.c [new file with mode: 0644]
tools/testing/selftests/bpf/xsk.h [new file with mode: 0644]
tools/testing/selftests/bpf/xsk_prereqs.sh
tools/testing/selftests/bpf/xskxceiver.c [new file with mode: 0644]
tools/testing/selftests/bpf/xskxceiver.h [new file with mode: 0644]

index 9e27fbd..1b0e671 100644 (file)
@@ -351,7 +351,7 @@ These instructions have seven implicit operands:
  * Register R0 is an implicit output which contains the data fetched from
    the packet.
  * Registers R1-R5 are scratch registers that are clobbered after a call to
-   ``BPF_ABS | BPF_LD`` or ``BPF_IND`` | BPF_LD instructions.
+   ``BPF_ABS | BPF_LD`` or ``BPF_IND | BPF_LD`` instructions.
 
 These instructions have an implicit program exit condition as well. When an
 eBPF program is trying to access the data beyond the packet boundary, the
index f86360f..c5ac97f 100644 (file)
@@ -9,8 +9,8 @@ described here. It's recommended to follow these conventions whenever a
 new function or type is added to keep libbpf API clean and consistent.
 
 All types and functions provided by libbpf API should have one of the
-following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
-``btf_dump_``, ``ring_buffer_``, ``perf_buffer_``.
+following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``btf_dump_``,
+``ring_buffer_``, ``perf_buffer_``.
 
 System call wrappers
 --------------------
@@ -59,15 +59,6 @@ Auxiliary functions and types that don't fit well in any of categories
 described above should have ``libbpf_`` prefix, e.g.
 ``libbpf_get_error`` or ``libbpf_prog_type_by_name``.
 
-AF_XDP functions
--------------------
-
-AF_XDP functions should have an ``xsk_`` prefix, e.g.
-``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists
-of both low-level ring access functions and high-level configuration
-functions. These can be mixed and matched. Note that these functions
-are not reentrant for performance reasons.
-
 ABI
 ---
 
index c4b709e..14b0749 100644 (file)
@@ -21917,8 +21917,7 @@ F:      include/uapi/linux/if_xdp.h
 F:     include/uapi/linux/xdp_diag.h
 F:     include/net/netns/xdp.h
 F:     net/xdp/
-F:     samples/bpf/xdpsock*
-F:     tools/lib/bpf/xsk*
+F:     tools/testing/selftests/bpf/*xsk*
 
 XEN BLOCK SUBSYSTEM
 M:     Roger Pau Monné <roger.pau@citrix.com>
index 42f2e9a..f08a444 100644 (file)
@@ -246,6 +246,7 @@ static bool is_lsi_offset(int offset, int scale)
 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 {
        const struct bpf_prog *prog = ctx->prog;
+       const bool is_main_prog = prog->aux->func_idx == 0;
        const u8 r6 = bpf2a64[BPF_REG_6];
        const u8 r7 = bpf2a64[BPF_REG_7];
        const u8 r8 = bpf2a64[BPF_REG_8];
@@ -299,7 +300,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
        /* Set up BPF prog stack base register */
        emit(A64_MOV(1, fp, A64_SP), ctx);
 
-       if (!ebpf_from_cbpf) {
+       if (!ebpf_from_cbpf && is_main_prog) {
                /* Initialize tail_call_cnt */
                emit(A64_MOVZ(1, tcc, 0, 0), ctx);
 
@@ -1530,3 +1531,9 @@ void bpf_jit_free_exec(void *addr)
 {
        return vfree(addr);
 }
+
+/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+       return true;
+}
index c98b8c0..b88f43c 100644 (file)
@@ -1771,6 +1771,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
                           struct bpf_tramp_link *l, int stack_size,
                           int run_ctx_off, bool save_ret)
 {
+       void (*exit)(struct bpf_prog *prog, u64 start,
+                    struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_exit;
+       u64 (*enter)(struct bpf_prog *prog,
+                    struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_enter;
        u8 *prog = *pprog;
        u8 *jmp_insn;
        int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
@@ -1789,15 +1793,21 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
         */
        emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off);
 
+       if (p->aux->sleepable) {
+               enter = __bpf_prog_enter_sleepable;
+               exit = __bpf_prog_exit_sleepable;
+       } else if (p->expected_attach_type == BPF_LSM_CGROUP) {
+               enter = __bpf_prog_enter_lsm_cgroup;
+               exit = __bpf_prog_exit_lsm_cgroup;
+       }
+
        /* arg1: mov rdi, progs[i] */
        emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
        /* arg2: lea rsi, [rbp - ctx_cookie_off] */
        EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
 
-       if (emit_call(&prog,
-                     p->aux->sleepable ? __bpf_prog_enter_sleepable :
-                     __bpf_prog_enter, prog))
-                       return -EINVAL;
+       if (emit_call(&prog, enter, prog))
+               return -EINVAL;
        /* remember prog start time returned by __bpf_prog_enter */
        emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
 
@@ -1841,10 +1851,8 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
        emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
        /* arg3: lea rdx, [rbp - run_ctx_off] */
        EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
-       if (emit_call(&prog,
-                     p->aux->sleepable ? __bpf_prog_exit_sleepable :
-                     __bpf_prog_exit, prog))
-                       return -EINVAL;
+       if (emit_call(&prog, exit, prog))
+               return -EINVAL;
 
        *pprog = prog;
        return 0;
@@ -2492,3 +2500,9 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
                return ERR_PTR(-EINVAL);
        return dst;
 }
+
+/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+       return true;
+}
index 695d122..7b121bd 100644 (file)
 
 struct bpf_prog_array;
 
+#ifdef CONFIG_BPF_LSM
+/* Maximum number of concurrently attachable per-cgroup LSM hooks. */
+#define CGROUP_LSM_NUM 10
+#else
+#define CGROUP_LSM_NUM 0
+#endif
+
 enum cgroup_bpf_attach_type {
        CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
        CGROUP_INET_INGRESS = 0,
@@ -35,6 +42,8 @@ enum cgroup_bpf_attach_type {
        CGROUP_INET4_GETSOCKNAME,
        CGROUP_INET6_GETSOCKNAME,
        CGROUP_INET_SOCK_RELEASE,
+       CGROUP_LSM_START,
+       CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1,
        MAX_CGROUP_BPF_ATTACH_TYPE
 };
 
@@ -47,8 +56,8 @@ struct cgroup_bpf {
         * have either zero or one element
         * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
         */
-       struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
-       u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
+       struct hlist_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
+       u8 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
 
        /* list of cgroup shared storages */
        struct list_head storages;
index 669d96d..2bd1b5f 100644 (file)
@@ -23,6 +23,13 @@ struct ctl_table;
 struct ctl_table_header;
 struct task_struct;
 
+unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
+                                      const struct bpf_insn *insn);
+unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
+                                        const struct bpf_insn *insn);
+unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
+                                         const struct bpf_insn *insn);
+
 #ifdef CONFIG_CGROUP_BPF
 
 #define CGROUP_ATYPE(type) \
@@ -95,7 +102,7 @@ struct bpf_cgroup_link {
 };
 
 struct bpf_prog_list {
-       struct list_head node;
+       struct hlist_node node;
        struct bpf_prog *prog;
        struct bpf_cgroup_link *link;
        struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
index 0edd7d2..2b21f2a 100644 (file)
@@ -56,6 +56,8 @@ typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
                                        struct bpf_iter_aux_info *aux);
 typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
+typedef unsigned int (*bpf_func_t)(const void *,
+                                  const struct bpf_insn *);
 struct bpf_iter_seq_info {
        const struct seq_operations *seq_ops;
        bpf_iter_init_seq_priv_t init_seq_private;
@@ -792,6 +794,10 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_
 u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
 void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
                                       struct bpf_tramp_run_ctx *run_ctx);
+u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
+                                       struct bpf_tramp_run_ctx *run_ctx);
+void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
+                                       struct bpf_tramp_run_ctx *run_ctx);
 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
 void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
 
@@ -879,8 +885,7 @@ struct bpf_dispatcher {
 static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
        const void *ctx,
        const struct bpf_insn *insnsi,
-       unsigned int (*bpf_func)(const void *,
-                                const struct bpf_insn *))
+       bpf_func_t bpf_func)
 {
        return bpf_func(ctx, insnsi);
 }
@@ -909,8 +914,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
        noinline __nocfi unsigned int bpf_dispatcher_##name##_func(     \
                const void *ctx,                                        \
                const struct bpf_insn *insnsi,                          \
-               unsigned int (*bpf_func)(const void *,                  \
-                                        const struct bpf_insn *))      \
+               bpf_func_t bpf_func)                                    \
        {                                                               \
                return bpf_func(ctx, insnsi);                           \
        }                                                               \
@@ -921,8 +925,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
        unsigned int bpf_dispatcher_##name##_func(                      \
                const void *ctx,                                        \
                const struct bpf_insn *insnsi,                          \
-               unsigned int (*bpf_func)(const void *,                  \
-                                        const struct bpf_insn *));     \
+               bpf_func_t bpf_func);                                   \
        extern struct bpf_dispatcher bpf_dispatcher_##name;
 #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func
 #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name)
@@ -1061,6 +1064,7 @@ struct bpf_prog_aux {
        struct user_struct *user;
        u64 load_time; /* ns since boottime */
        u32 verified_insns;
+       int cgroup_atype; /* enum cgroup_bpf_attach_type */
        struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
        char name[BPF_OBJ_NAME_LEN];
 #ifdef CONFIG_SECURITY
@@ -1168,6 +1172,11 @@ struct bpf_tramp_link {
        u64 cookie;
 };
 
+struct bpf_shim_tramp_link {
+       struct bpf_tramp_link link;
+       struct bpf_trampoline *trampoline;
+};
+
 struct bpf_tracing_link {
        struct bpf_tramp_link link;
        enum bpf_attach_type attach_type;
@@ -1246,6 +1255,9 @@ struct bpf_dummy_ops {
 int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
                            union bpf_attr __user *uattr);
 #endif
+int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
+                                   int cgroup_atype);
+void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog);
 #else
 static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
 {
@@ -1269,6 +1281,14 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
 {
        return -EINVAL;
 }
+static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
+                                                 int cgroup_atype)
+{
+       return -EOPNOTSUPP;
+}
+static inline void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
+{
+}
 #endif
 
 struct bpf_array {
@@ -1286,6 +1306,9 @@ struct bpf_array {
 #define BPF_COMPLEXITY_LIMIT_INSNS      1000000 /* yes. 1M insns */
 #define MAX_TAIL_CALL_CNT 33
 
+/* Maximum number of loops for bpf_loop */
+#define BPF_MAX_LOOPS  BIT(23)
+
 #define BPF_F_ACCESS_MASK      (BPF_F_RDONLY |         \
                                 BPF_F_RDONLY_PROG |    \
                                 BPF_F_WRONLY |         \
@@ -2363,9 +2386,13 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
 extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
 extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
 extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
+extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto;
+extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
 extern const struct bpf_func_proto bpf_loop_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
+extern const struct bpf_func_proto bpf_set_retval_proto;
+extern const struct bpf_func_proto bpf_get_retval_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
@@ -2519,4 +2546,12 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
 void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
 int bpf_dynptr_check_size(u32 size);
 
+#ifdef CONFIG_BPF_LSM
+void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
+void bpf_cgroup_atype_put(int cgroup_atype);
+#else
+static inline void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) {}
+static inline void bpf_cgroup_atype_put(int cgroup_atype) {}
+#endif /* CONFIG_BPF_LSM */
+
 #endif /* _LINUX_BPF_H */
index 479c101..4bcf76a 100644 (file)
@@ -42,6 +42,8 @@ extern const struct bpf_func_proto bpf_inode_storage_get_proto;
 extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
 void bpf_inode_storage_free(struct inode *inode);
 
+void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func);
+
 #else /* !CONFIG_BPF_LSM */
 
 static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
@@ -65,6 +67,11 @@ static inline void bpf_inode_storage_free(struct inode *inode)
 {
 }
 
+static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
+                                          bpf_func_t *bpf_func)
+{
+}
+
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
index 3930c96..81b1966 100644 (file)
@@ -344,6 +344,14 @@ struct bpf_verifier_state_list {
        int miss_cnt, hit_cnt;
 };
 
+struct bpf_loop_inline_state {
+       int initialized:1; /* set to true upon first entry */
+       int fit_for_inline:1; /* true if callback function is the same
+                              * at each call and flags are always zero
+                              */
+       u32 callback_subprogno; /* valid when fit_for_inline is true */
+};
+
 /* Possible states for alu_state member. */
 #define BPF_ALU_SANITIZE_SRC           (1U << 0)
 #define BPF_ALU_SANITIZE_DST           (1U << 1)
@@ -373,6 +381,10 @@ struct bpf_insn_aux_data {
                                u32 mem_size;   /* mem_size for non-struct typed var */
                        };
                } btf_var;
+               /* if instruction is a call to bpf_loop this field tracks
+                * the state of the relevant registers to make decision about inlining
+                */
+               struct bpf_loop_inline_state loop_inline_state;
        };
        u64 map_key_state; /* constant (32 bit) key tracking for maps */
        int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
index 335a190..252a4be 100644 (file)
@@ -179,7 +179,8 @@ extern struct btf_id_set name;
        BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)                      \
        BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)                    \
        BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)                    \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock)
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock)                  \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCKET, socket)
 
 enum {
 #define BTF_SOCK_TYPE(name, str) name,
index d0cbb31..4c1a8b2 100644 (file)
@@ -914,6 +914,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
 void bpf_jit_compile(struct bpf_prog *prog);
 bool bpf_jit_needs_zext(void);
+bool bpf_jit_supports_subprog_tailcalls(void);
 bool bpf_jit_supports_kfunc_call(void);
 bool bpf_helper_changes_pkt_data(void *func);
 
index 12093f4..a03485e 100644 (file)
@@ -152,6 +152,8 @@ struct module;
 struct sk_buff;
 typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
                               unsigned int, size_t);
+typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *);
+
 
 struct proto_ops {
        int             family;
@@ -214,6 +216,8 @@ struct proto_ops {
         */
        int             (*read_sock)(struct sock *sk, read_descriptor_t *desc,
                                     sk_read_actor_t recv_actor);
+       /* This is different from read_sock(), it reads an entire skb at a time. */
+       int             (*read_skb)(struct sock *sk, skb_read_actor_t recv_actor);
        int             (*sendpage_locked)(struct sock *sk, struct page *page,
                                           int offset, size_t size, int flags);
        int             (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
index c21a9b5..8e48dc5 100644 (file)
@@ -672,6 +672,7 @@ void tcp_get_info(struct sock *, struct tcp_info *);
 /* Read 'sendfile()'-style from a TCP socket */
 int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
                  sk_read_actor_t recv_actor);
+int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
 
 void tcp_initialize_rcv_mss(struct sock *sk);
 
index b60eea2..987f7fc 100644 (file)
@@ -306,8 +306,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
                               struct sk_buff *skb);
 struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
                                 __be16 sport, __be16 dport);
-int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
-                 sk_read_actor_t recv_actor);
+int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
 
 /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
  * possibly multiple cache miss on dequeue()
index e813628..379e68f 100644 (file)
@@ -998,6 +998,7 @@ enum bpf_attach_type {
        BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
        BPF_PERF_EVENT,
        BPF_TRACE_KPROBE_MULTI,
+       BPF_LSM_CGROUP,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -1431,6 +1432,7 @@ union bpf_attr {
                __u32           attach_flags;
                __aligned_u64   prog_ids;
                __u32           prog_cnt;
+               __aligned_u64   prog_attach_flags; /* output: per-program attach_flags */
        } query;
 
        struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
@@ -6075,6 +6077,8 @@ struct bpf_prog_info {
        __u64 run_cnt;
        __u64 recursion_misses;
        __u32 verified_insns;
+       __u32 attach_btf_obj_id;
+       __u32 attach_btf_id;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -6782,6 +6786,7 @@ enum bpf_core_relo_kind {
        BPF_CORE_TYPE_SIZE = 9,              /* type size in bytes */
        BPF_CORE_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
        BPF_CORE_ENUMVAL_VALUE = 11,         /* enum value integer value */
+       BPF_CORE_TYPE_MATCHES = 12,          /* type match in target kernel */
 };
 
 /*
index d5d96ce..7e8fd49 100644 (file)
@@ -723,9 +723,6 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = {
        .arg4_type      = ARG_ANYTHING,
 };
 
-/* maximum number of loops */
-#define MAX_LOOPS      BIT(23)
-
 BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
           u64, flags)
 {
@@ -733,9 +730,13 @@ BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
        u64 ret;
        u32 i;
 
+       /* Note: these safety checks are also verified when bpf_loop
+        * is inlined, be careful to modify this code in sync. See
+        * function verifier.c:inline_bpf_loop.
+        */
        if (flags)
                return -EINVAL;
-       if (nr_loops > MAX_LOOPS)
+       if (nr_loops > BPF_MAX_LOOPS)
                return -E2BIG;
 
        for (i = 0; i < nr_loops; i++) {
index c1351df..d469b7f 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/bpf_local_storage.h>
 #include <linux/btf_ids.h>
 #include <linux/ima.h>
+#include <linux/bpf-cgroup.h>
 
 /* For every LSM hook that allows attachment of BPF programs, declare a nop
  * function where a BPF program can be attached.
@@ -35,6 +36,57 @@ BTF_SET_START(bpf_lsm_hooks)
 #undef LSM_HOOK
 BTF_SET_END(bpf_lsm_hooks)
 
+/* List of LSM hooks that should operate on 'current' cgroup regardless
+ * of function signature.
+ */
+BTF_SET_START(bpf_lsm_current_hooks)
+/* operate on freshly allocated sk without any cgroup association */
+BTF_ID(func, bpf_lsm_sk_alloc_security)
+BTF_ID(func, bpf_lsm_sk_free_security)
+BTF_SET_END(bpf_lsm_current_hooks)
+
+/* List of LSM hooks that trigger while the socket is properly locked.
+ */
+BTF_SET_START(bpf_lsm_locked_sockopt_hooks)
+BTF_ID(func, bpf_lsm_socket_sock_rcv_skb)
+BTF_ID(func, bpf_lsm_sock_graft)
+BTF_ID(func, bpf_lsm_inet_csk_clone)
+BTF_ID(func, bpf_lsm_inet_conn_established)
+BTF_SET_END(bpf_lsm_locked_sockopt_hooks)
+
+/* List of LSM hooks that trigger while the socket is _not_ locked,
+ * but it's ok to call bpf_{g,s}etsockopt because the socket is still
+ * in the early init phase.
+ */
+BTF_SET_START(bpf_lsm_unlocked_sockopt_hooks)
+BTF_ID(func, bpf_lsm_socket_post_create)
+BTF_ID(func, bpf_lsm_socket_socketpair)
+BTF_SET_END(bpf_lsm_unlocked_sockopt_hooks)
+
+void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
+                            bpf_func_t *bpf_func)
+{
+       const struct btf_param *args;
+
+       if (btf_type_vlen(prog->aux->attach_func_proto) < 1 ||
+           btf_id_set_contains(&bpf_lsm_current_hooks,
+                               prog->aux->attach_btf_id)) {
+               *bpf_func = __cgroup_bpf_run_lsm_current;
+               return;
+       }
+
+       args = btf_params(prog->aux->attach_func_proto);
+
+#ifdef CONFIG_NET
+       if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCKET])
+               *bpf_func = __cgroup_bpf_run_lsm_socket;
+       else if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCK])
+               *bpf_func = __cgroup_bpf_run_lsm_sock;
+       else
+#endif
+               *bpf_func = __cgroup_bpf_run_lsm_current;
+}
+
 int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
                        const struct bpf_prog *prog)
 {
@@ -158,6 +210,35 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL;
        case BPF_FUNC_get_attach_cookie:
                return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL;
+       case BPF_FUNC_get_local_storage:
+               return prog->expected_attach_type == BPF_LSM_CGROUP ?
+                       &bpf_get_local_storage_proto : NULL;
+       case BPF_FUNC_set_retval:
+               return prog->expected_attach_type == BPF_LSM_CGROUP ?
+                       &bpf_set_retval_proto : NULL;
+       case BPF_FUNC_get_retval:
+               return prog->expected_attach_type == BPF_LSM_CGROUP ?
+                       &bpf_get_retval_proto : NULL;
+       case BPF_FUNC_setsockopt:
+               if (prog->expected_attach_type != BPF_LSM_CGROUP)
+                       return NULL;
+               if (btf_id_set_contains(&bpf_lsm_locked_sockopt_hooks,
+                                       prog->aux->attach_btf_id))
+                       return &bpf_sk_setsockopt_proto;
+               if (btf_id_set_contains(&bpf_lsm_unlocked_sockopt_hooks,
+                                       prog->aux->attach_btf_id))
+                       return &bpf_unlocked_sk_setsockopt_proto;
+               return NULL;
+       case BPF_FUNC_getsockopt:
+               if (prog->expected_attach_type != BPF_LSM_CGROUP)
+                       return NULL;
+               if (btf_id_set_contains(&bpf_lsm_locked_sockopt_hooks,
+                                       prog->aux->attach_btf_id))
+                       return &bpf_sk_getsockopt_proto;
+               if (btf_id_set_contains(&bpf_lsm_unlocked_sockopt_hooks,
+                                       prog->aux->attach_btf_id))
+                       return &bpf_unlocked_sk_getsockopt_proto;
+               return NULL;
        default:
                return tracing_prog_func_proto(func_id, prog);
        }
index d9a3c92..7e0068c 100644 (file)
@@ -503,10 +503,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
                goto unlock;
        }
 
-       /* Error during st_ops->reg().  It is very unlikely since
-        * the above init_member() should have caught it earlier
-        * before reg().  The only possibility is if there was a race
-        * in registering the struct_ops (under the same name) to
+       /* Error during st_ops->reg(). Can happen if this struct_ops needs to be
+        * verified as a whole, after all init_member() calls. Can also happen if
+        * there was a race in registering the struct_ops (under the same name) to
         * a sub-system through different struct_ops's maps.
         */
        set_memory_nx((long)st_map->image, 1);
index d003d4d..4423045 100644 (file)
@@ -5368,6 +5368,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 
        if (arg == nr_args) {
                switch (prog->expected_attach_type) {
+               case BPF_LSM_CGROUP:
                case BPF_LSM_MAC:
                case BPF_TRACE_FEXIT:
                        /* When LSM programs are attached to void LSM hooks
@@ -7421,87 +7422,6 @@ EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs);
 
 #define MAX_TYPES_ARE_COMPAT_DEPTH 2
 
-static
-int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
-                               const struct btf *targ_btf, __u32 targ_id,
-                               int level)
-{
-       const struct btf_type *local_type, *targ_type;
-       int depth = 32; /* max recursion depth */
-
-       /* caller made sure that names match (ignoring flavor suffix) */
-       local_type = btf_type_by_id(local_btf, local_id);
-       targ_type = btf_type_by_id(targ_btf, targ_id);
-       if (btf_kind(local_type) != btf_kind(targ_type))
-               return 0;
-
-recur:
-       depth--;
-       if (depth < 0)
-               return -EINVAL;
-
-       local_type = btf_type_skip_modifiers(local_btf, local_id, &local_id);
-       targ_type = btf_type_skip_modifiers(targ_btf, targ_id, &targ_id);
-       if (!local_type || !targ_type)
-               return -EINVAL;
-
-       if (btf_kind(local_type) != btf_kind(targ_type))
-               return 0;
-
-       switch (btf_kind(local_type)) {
-       case BTF_KIND_UNKN:
-       case BTF_KIND_STRUCT:
-       case BTF_KIND_UNION:
-       case BTF_KIND_ENUM:
-       case BTF_KIND_FWD:
-       case BTF_KIND_ENUM64:
-               return 1;
-       case BTF_KIND_INT:
-               /* just reject deprecated bitfield-like integers; all other
-                * integers are by default compatible between each other
-                */
-               return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
-       case BTF_KIND_PTR:
-               local_id = local_type->type;
-               targ_id = targ_type->type;
-               goto recur;
-       case BTF_KIND_ARRAY:
-               local_id = btf_array(local_type)->type;
-               targ_id = btf_array(targ_type)->type;
-               goto recur;
-       case BTF_KIND_FUNC_PROTO: {
-               struct btf_param *local_p = btf_params(local_type);
-               struct btf_param *targ_p = btf_params(targ_type);
-               __u16 local_vlen = btf_vlen(local_type);
-               __u16 targ_vlen = btf_vlen(targ_type);
-               int i, err;
-
-               if (local_vlen != targ_vlen)
-                       return 0;
-
-               for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
-                       if (level <= 0)
-                               return -EINVAL;
-
-                       btf_type_skip_modifiers(local_btf, local_p->type, &local_id);
-                       btf_type_skip_modifiers(targ_btf, targ_p->type, &targ_id);
-                       err = __bpf_core_types_are_compat(local_btf, local_id,
-                                                         targ_btf, targ_id,
-                                                         level - 1);
-                       if (err <= 0)
-                               return err;
-               }
-
-               /* tail recurse for return type check */
-               btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
-               btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
-               goto recur;
-       }
-       default:
-               return 0;
-       }
-}
-
 /* Check local and target types for compatibility. This check is used for
  * type-based CO-RE relocations and follow slightly different rules than
  * field-based relocations. This function assumes that root types were already
@@ -7524,11 +7444,19 @@ recur:
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
                              const struct btf *targ_btf, __u32 targ_id)
 {
-       return __bpf_core_types_are_compat(local_btf, local_id,
-                                          targ_btf, targ_id,
+       return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
                                           MAX_TYPES_ARE_COMPAT_DEPTH);
 }
 
+#define MAX_TYPES_MATCH_DEPTH 2
+
+int bpf_core_types_match(const struct btf *local_btf, u32 local_id,
+                        const struct btf *targ_btf, u32 targ_id)
+{
+       return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false,
+                                     MAX_TYPES_MATCH_DEPTH);
+}
+
 static bool bpf_core_is_flavor_sep(const char *s)
 {
        /* check X___Y name pattern, where X and Y are not underscores */
index 7a394f7..59b7eb6 100644 (file)
@@ -14,6 +14,8 @@
 #include <linux/string.h>
 #include <linux/bpf.h>
 #include <linux/bpf-cgroup.h>
+#include <linux/bpf_lsm.h>
+#include <linux/bpf_verifier.h>
 #include <net/sock.h>
 #include <net/bpf_sk_storage.h>
 
@@ -61,6 +63,132 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
        return run_ctx.retval;
 }
 
+unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
+                                      const struct bpf_insn *insn)
+{
+       const struct bpf_prog *shim_prog;
+       struct sock *sk;
+       struct cgroup *cgrp;
+       int ret = 0;
+       u64 *args;
+
+       args = (u64 *)ctx;
+       sk = (void *)(unsigned long)args[0];
+       /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
+       shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
+
+       cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+       if (likely(cgrp))
+               ret = bpf_prog_run_array_cg(&cgrp->bpf,
+                                           shim_prog->aux->cgroup_atype,
+                                           ctx, bpf_prog_run, 0, NULL);
+       return ret;
+}
+
+unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
+                                        const struct bpf_insn *insn)
+{
+       const struct bpf_prog *shim_prog;
+       struct socket *sock;
+       struct cgroup *cgrp;
+       int ret = 0;
+       u64 *args;
+
+       args = (u64 *)ctx;
+       sock = (void *)(unsigned long)args[0];
+       /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
+       shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
+
+       cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
+       if (likely(cgrp))
+               ret = bpf_prog_run_array_cg(&cgrp->bpf,
+                                           shim_prog->aux->cgroup_atype,
+                                           ctx, bpf_prog_run, 0, NULL);
+       return ret;
+}
+
+unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
+                                         const struct bpf_insn *insn)
+{
+       const struct bpf_prog *shim_prog;
+       struct cgroup *cgrp;
+       int ret = 0;
+
+       /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
+       shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
+
+       /* We rely on trampoline's __bpf_prog_enter_lsm_cgroup to grab RCU read lock. */
+       cgrp = task_dfl_cgroup(current);
+       if (likely(cgrp))
+               ret = bpf_prog_run_array_cg(&cgrp->bpf,
+                                           shim_prog->aux->cgroup_atype,
+                                           ctx, bpf_prog_run, 0, NULL);
+       return ret;
+}
+
+#ifdef CONFIG_BPF_LSM
+struct cgroup_lsm_atype {
+       u32 attach_btf_id;
+       int refcnt;
+};
+
+static struct cgroup_lsm_atype cgroup_lsm_atype[CGROUP_LSM_NUM];
+
+static enum cgroup_bpf_attach_type
+bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
+{
+       int i;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       if (attach_type != BPF_LSM_CGROUP)
+               return to_cgroup_bpf_attach_type(attach_type);
+
+       for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
+               if (cgroup_lsm_atype[i].attach_btf_id == attach_btf_id)
+                       return CGROUP_LSM_START + i;
+
+       for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
+               if (cgroup_lsm_atype[i].attach_btf_id == 0)
+                       return CGROUP_LSM_START + i;
+
+       return -E2BIG;
+
+}
+
+void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype)
+{
+       int i = cgroup_atype - CGROUP_LSM_START;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       WARN_ON_ONCE(cgroup_lsm_atype[i].attach_btf_id &&
+                    cgroup_lsm_atype[i].attach_btf_id != attach_btf_id);
+
+       cgroup_lsm_atype[i].attach_btf_id = attach_btf_id;
+       cgroup_lsm_atype[i].refcnt++;
+}
+
+void bpf_cgroup_atype_put(int cgroup_atype)
+{
+       int i = cgroup_atype - CGROUP_LSM_START;
+
+       mutex_lock(&cgroup_mutex);
+       if (--cgroup_lsm_atype[i].refcnt <= 0)
+               cgroup_lsm_atype[i].attach_btf_id = 0;
+       WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0);
+       mutex_unlock(&cgroup_mutex);
+}
+#else
+static enum cgroup_bpf_attach_type
+bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
+{
+       if (attach_type != BPF_LSM_CGROUP)
+               return to_cgroup_bpf_attach_type(attach_type);
+       return -EOPNOTSUPP;
+}
+#endif /* CONFIG_BPF_LSM */
+
 void cgroup_bpf_offline(struct cgroup *cgrp)
 {
        cgroup_get(cgrp);
@@ -157,15 +285,22 @@ static void cgroup_bpf_release(struct work_struct *work)
        mutex_lock(&cgroup_mutex);
 
        for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
-               struct list_head *progs = &cgrp->bpf.progs[atype];
-               struct bpf_prog_list *pl, *pltmp;
+               struct hlist_head *progs = &cgrp->bpf.progs[atype];
+               struct bpf_prog_list *pl;
+               struct hlist_node *pltmp;
 
-               list_for_each_entry_safe(pl, pltmp, progs, node) {
-                       list_del(&pl->node);
-                       if (pl->prog)
+               hlist_for_each_entry_safe(pl, pltmp, progs, node) {
+                       hlist_del(&pl->node);
+                       if (pl->prog) {
+                               if (pl->prog->expected_attach_type == BPF_LSM_CGROUP)
+                                       bpf_trampoline_unlink_cgroup_shim(pl->prog);
                                bpf_prog_put(pl->prog);
-                       if (pl->link)
+                       }
+                       if (pl->link) {
+                               if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP)
+                                       bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog);
                                bpf_cgroup_link_auto_detach(pl->link);
+                       }
                        kfree(pl);
                        static_branch_dec(&cgroup_bpf_enabled_key[atype]);
                }
@@ -217,12 +352,12 @@ static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
 /* count number of elements in the list.
  * it's slow but the list cannot be long
  */
-static u32 prog_list_length(struct list_head *head)
+static u32 prog_list_length(struct hlist_head *head)
 {
        struct bpf_prog_list *pl;
        u32 cnt = 0;
 
-       list_for_each_entry(pl, head, node) {
+       hlist_for_each_entry(pl, head, node) {
                if (!prog_list_prog(pl))
                        continue;
                cnt++;
@@ -291,7 +426,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
                if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
                        continue;
 
-               list_for_each_entry(pl, &p->bpf.progs[atype], node) {
+               hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
                        if (!prog_list_prog(pl))
                                continue;
 
@@ -342,7 +477,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
                cgroup_bpf_get(p);
 
        for (i = 0; i < NR; i++)
-               INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
+               INIT_HLIST_HEAD(&cgrp->bpf.progs[i]);
 
        INIT_LIST_HEAD(&cgrp->bpf.storages);
 
@@ -418,7 +553,7 @@ cleanup:
 
 #define BPF_CGROUP_MAX_PROGS 64
 
-static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
+static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
                                               struct bpf_prog *prog,
                                               struct bpf_cgroup_link *link,
                                               struct bpf_prog *replace_prog,
@@ -428,12 +563,12 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
 
        /* single-attach case */
        if (!allow_multi) {
-               if (list_empty(progs))
+               if (hlist_empty(progs))
                        return NULL;
-               return list_first_entry(progs, typeof(*pl), node);
+               return hlist_entry(progs->first, typeof(*pl), node);
        }
 
-       list_for_each_entry(pl, progs, node) {
+       hlist_for_each_entry(pl, progs, node) {
                if (prog && pl->prog == prog && prog != replace_prog)
                        /* disallow attaching the same prog twice */
                        return ERR_PTR(-EINVAL);
@@ -444,7 +579,7 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
 
        /* direct prog multi-attach w/ replacement case */
        if (replace_prog) {
-               list_for_each_entry(pl, progs, node) {
+               hlist_for_each_entry(pl, progs, node) {
                        if (pl->prog == replace_prog)
                                /* a match found */
                                return pl;
@@ -478,9 +613,10 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
        struct bpf_prog *old_prog = NULL;
        struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
        struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+       struct bpf_prog *new_prog = prog ? : link->link.prog;
        enum cgroup_bpf_attach_type atype;
        struct bpf_prog_list *pl;
-       struct list_head *progs;
+       struct hlist_head *progs;
        int err;
 
        if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
@@ -494,7 +630,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
                /* replace_prog implies BPF_F_REPLACE, and vice versa */
                return -EINVAL;
 
-       atype = to_cgroup_bpf_attach_type(type);
+       atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id);
        if (atype < 0)
                return -EINVAL;
 
@@ -503,7 +639,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
        if (!hierarchy_allows_attach(cgrp, atype))
                return -EPERM;
 
-       if (!list_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
+       if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
                /* Disallow attaching non-overridable on top
                 * of existing overridable in this cgroup.
                 * Disallow attaching multi-prog if overridable or none
@@ -525,12 +661,22 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
        if (pl) {
                old_prog = pl->prog;
        } else {
+               struct hlist_node *last = NULL;
+
                pl = kmalloc(sizeof(*pl), GFP_KERNEL);
                if (!pl) {
                        bpf_cgroup_storages_free(new_storage);
                        return -ENOMEM;
                }
-               list_add_tail(&pl->node, progs);
+               if (hlist_empty(progs))
+                       hlist_add_head(&pl->node, progs);
+               else
+                       hlist_for_each(last, progs) {
+                               if (last->next)
+                                       continue;
+                               hlist_add_behind(&pl->node, last);
+                               break;
+                       }
        }
 
        pl->prog = prog;
@@ -538,17 +684,30 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
        bpf_cgroup_storages_assign(pl->storage, storage);
        cgrp->bpf.flags[atype] = saved_flags;
 
+       if (type == BPF_LSM_CGROUP) {
+               err = bpf_trampoline_link_cgroup_shim(new_prog, atype);
+               if (err)
+                       goto cleanup;
+       }
+
        err = update_effective_progs(cgrp, atype);
        if (err)
-               goto cleanup;
+               goto cleanup_trampoline;
 
-       if (old_prog)
+       if (old_prog) {
+               if (type == BPF_LSM_CGROUP)
+                       bpf_trampoline_unlink_cgroup_shim(old_prog);
                bpf_prog_put(old_prog);
-       else
+       } else {
                static_branch_inc(&cgroup_bpf_enabled_key[atype]);
+       }
        bpf_cgroup_storages_link(new_storage, cgrp, type);
        return 0;
 
+cleanup_trampoline:
+       if (type == BPF_LSM_CGROUP)
+               bpf_trampoline_unlink_cgroup_shim(new_prog);
+
 cleanup:
        if (old_prog) {
                pl->prog = old_prog;
@@ -556,7 +715,7 @@ cleanup:
        }
        bpf_cgroup_storages_free(new_storage);
        if (!old_prog) {
-               list_del(&pl->node);
+               hlist_del(&pl->node);
                kfree(pl);
        }
        return err;
@@ -587,7 +746,7 @@ static void replace_effective_prog(struct cgroup *cgrp,
        struct cgroup_subsys_state *css;
        struct bpf_prog_array *progs;
        struct bpf_prog_list *pl;
-       struct list_head *head;
+       struct hlist_head *head;
        struct cgroup *cg;
        int pos;
 
@@ -603,7 +762,7 @@ static void replace_effective_prog(struct cgroup *cgrp,
                                continue;
 
                        head = &cg->bpf.progs[atype];
-                       list_for_each_entry(pl, head, node) {
+                       hlist_for_each_entry(pl, head, node) {
                                if (!prog_list_prog(pl))
                                        continue;
                                if (pl->link == link)
@@ -637,10 +796,10 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
        enum cgroup_bpf_attach_type atype;
        struct bpf_prog *old_prog;
        struct bpf_prog_list *pl;
-       struct list_head *progs;
+       struct hlist_head *progs;
        bool found = false;
 
-       atype = to_cgroup_bpf_attach_type(link->type);
+       atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id);
        if (atype < 0)
                return -EINVAL;
 
@@ -649,7 +808,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
        if (link->link.prog->type != new_prog->type)
                return -EINVAL;
 
-       list_for_each_entry(pl, progs, node) {
+       hlist_for_each_entry(pl, progs, node) {
                if (pl->link == link) {
                        found = true;
                        break;
@@ -688,7 +847,7 @@ out_unlock:
        return ret;
 }
 
-static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
+static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs,
                                               struct bpf_prog *prog,
                                               struct bpf_cgroup_link *link,
                                               bool allow_multi)
@@ -696,14 +855,14 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
        struct bpf_prog_list *pl;
 
        if (!allow_multi) {
-               if (list_empty(progs))
+               if (hlist_empty(progs))
                        /* report error when trying to detach and nothing is attached */
                        return ERR_PTR(-ENOENT);
 
                /* to maintain backward compatibility NONE and OVERRIDE cgroups
                 * allow detaching with invalid FD (prog==NULL) in legacy mode
                 */
-               return list_first_entry(progs, typeof(*pl), node);
+               return hlist_entry(progs->first, typeof(*pl), node);
        }
 
        if (!prog && !link)
@@ -713,7 +872,7 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
                return ERR_PTR(-EINVAL);
 
        /* find the prog or link and detach it */
-       list_for_each_entry(pl, progs, node) {
+       hlist_for_each_entry(pl, progs, node) {
                if (pl->prog == prog && pl->link == link)
                        return pl;
        }
@@ -737,7 +896,7 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
        struct cgroup_subsys_state *css;
        struct bpf_prog_array *progs;
        struct bpf_prog_list *pl;
-       struct list_head *head;
+       struct hlist_head *head;
        struct cgroup *cg;
        int pos;
 
@@ -754,7 +913,7 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
                                continue;
 
                        head = &cg->bpf.progs[atype];
-                       list_for_each_entry(pl, head, node) {
+                       hlist_for_each_entry(pl, head, node) {
                                if (!prog_list_prog(pl))
                                        continue;
                                if (pl->prog == prog && pl->link == link)
@@ -791,10 +950,16 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
        enum cgroup_bpf_attach_type atype;
        struct bpf_prog *old_prog;
        struct bpf_prog_list *pl;
-       struct list_head *progs;
+       struct hlist_head *progs;
+       u32 attach_btf_id = 0;
        u32 flags;
 
-       atype = to_cgroup_bpf_attach_type(type);
+       if (prog)
+               attach_btf_id = prog->aux->attach_btf_id;
+       if (link)
+               attach_btf_id = link->link.prog->aux->attach_btf_id;
+
+       atype = bpf_cgroup_atype_find(type, attach_btf_id);
        if (atype < 0)
                return -EINVAL;
 
@@ -822,13 +987,17 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
        }
 
        /* now can actually delete it from this cgroup list */
-       list_del(&pl->node);
+       hlist_del(&pl->node);
+
        kfree(pl);
-       if (list_empty(progs))
+       if (hlist_empty(progs))
                /* last program was detached, reset flags to zero */
                cgrp->bpf.flags[atype] = 0;
-       if (old_prog)
+       if (old_prog) {
+               if (type == BPF_LSM_CGROUP)
+                       bpf_trampoline_unlink_cgroup_shim(old_prog);
                bpf_prog_put(old_prog);
+       }
        static_branch_dec(&cgroup_bpf_enabled_key[atype]);
        return 0;
 }
@@ -848,57 +1017,90 @@ static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
                              union bpf_attr __user *uattr)
 {
+       __u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
        __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
        enum bpf_attach_type type = attr->query.attach_type;
+       enum cgroup_bpf_attach_type from_atype, to_atype;
        enum cgroup_bpf_attach_type atype;
        struct bpf_prog_array *effective;
-       struct list_head *progs;
-       struct bpf_prog *prog;
        int cnt, ret = 0, i;
+       int total_cnt = 0;
        u32 flags;
 
-       atype = to_cgroup_bpf_attach_type(type);
-       if (atype < 0)
-               return -EINVAL;
-
-       progs = &cgrp->bpf.progs[atype];
-       flags = cgrp->bpf.flags[atype];
+       if (type == BPF_LSM_CGROUP) {
+               if (attr->query.prog_cnt && prog_ids && !prog_attach_flags)
+                       return -EINVAL;
 
-       effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
-                                             lockdep_is_held(&cgroup_mutex));
+               from_atype = CGROUP_LSM_START;
+               to_atype = CGROUP_LSM_END;
+               flags = 0;
+       } else {
+               from_atype = to_cgroup_bpf_attach_type(type);
+               if (from_atype < 0)
+                       return -EINVAL;
+               to_atype = from_atype;
+               flags = cgrp->bpf.flags[from_atype];
+       }
 
-       if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
-               cnt = bpf_prog_array_length(effective);
-       else
-               cnt = prog_list_length(progs);
+       for (atype = from_atype; atype <= to_atype; atype++) {
+               if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
+                       effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
+                                                             lockdep_is_held(&cgroup_mutex));
+                       total_cnt += bpf_prog_array_length(effective);
+               } else {
+                       total_cnt += prog_list_length(&cgrp->bpf.progs[atype]);
+               }
+       }
 
        if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
                return -EFAULT;
-       if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
+       if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
                return -EFAULT;
-       if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
+       if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt)
                /* return early if user requested only program count + flags */
                return 0;
-       if (attr->query.prog_cnt < cnt) {
-               cnt = attr->query.prog_cnt;
+
+       if (attr->query.prog_cnt < total_cnt) {
+               total_cnt = attr->query.prog_cnt;
                ret = -ENOSPC;
        }
 
-       if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
-               return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
-       } else {
-               struct bpf_prog_list *pl;
-               u32 id;
-
-               i = 0;
-               list_for_each_entry(pl, progs, node) {
-                       prog = prog_list_prog(pl);
-                       id = prog->aux->id;
-                       if (copy_to_user(prog_ids + i, &id, sizeof(id)))
-                               return -EFAULT;
-                       if (++i == cnt)
-                               break;
+       for (atype = from_atype; atype <= to_atype && total_cnt; atype++) {
+               if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
+                       effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
+                                                             lockdep_is_held(&cgroup_mutex));
+                       cnt = min_t(int, bpf_prog_array_length(effective), total_cnt);
+                       ret = bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
+               } else {
+                       struct hlist_head *progs;
+                       struct bpf_prog_list *pl;
+                       struct bpf_prog *prog;
+                       u32 id;
+
+                       progs = &cgrp->bpf.progs[atype];
+                       cnt = min_t(int, prog_list_length(progs), total_cnt);
+                       i = 0;
+                       hlist_for_each_entry(pl, progs, node) {
+                               prog = prog_list_prog(pl);
+                               id = prog->aux->id;
+                               if (copy_to_user(prog_ids + i, &id, sizeof(id)))
+                                       return -EFAULT;
+                               if (++i == cnt)
+                                       break;
+                       }
                }
+
+               if (prog_attach_flags) {
+                       flags = cgrp->bpf.flags[atype];
+
+                       for (i = 0; i < cnt; i++)
+                               if (copy_to_user(prog_attach_flags + i, &flags, sizeof(flags)))
+                                       return -EFAULT;
+                       prog_attach_flags += cnt;
+               }
+
+               prog_ids += cnt;
+               total_cnt -= cnt;
        }
        return ret;
 }
@@ -987,6 +1189,8 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
 
        WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
                                    cg_link->type));
+       if (cg_link->type == BPF_LSM_CGROUP)
+               bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog);
 
        cg = cg_link->cgroup;
        cg_link->cgroup = NULL;
@@ -1331,7 +1535,7 @@ BPF_CALL_0(bpf_get_retval)
        return ctx->retval;
 }
 
-static const struct bpf_func_proto bpf_get_retval_proto = {
+const struct bpf_func_proto bpf_get_retval_proto = {
        .func           = bpf_get_retval,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
@@ -1346,7 +1550,7 @@ BPF_CALL_1(bpf_set_retval, int, retval)
        return 0;
 }
 
-static const struct bpf_func_proto bpf_set_retval_proto = {
+const struct bpf_func_proto bpf_set_retval_proto = {
        .func           = bpf_set_retval,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
index b5ffebc..805c2ad 100644 (file)
@@ -107,6 +107,9 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
        fp->aux->prog = fp;
        fp->jit_requested = ebpf_jit_enabled();
        fp->blinding_requested = bpf_jit_blinding_enabled(fp);
+#ifdef CONFIG_CGROUP_BPF
+       aux->cgroup_atype = CGROUP_BPF_ATTACH_TYPE_INVALID;
+#endif
 
        INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
        mutex_init(&fp->aux->used_maps_mutex);
@@ -2569,6 +2572,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
        aux = container_of(work, struct bpf_prog_aux, work);
 #ifdef CONFIG_BPF_SYSCALL
        bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab);
+#endif
+#ifdef CONFIG_CGROUP_BPF
+       if (aux->cgroup_atype != CGROUP_BPF_ATTACH_TYPE_INVALID)
+               bpf_cgroup_atype_put(aux->cgroup_atype);
 #endif
        bpf_free_used_maps(aux);
        bpf_free_used_btfs(aux);
@@ -2666,6 +2673,8 @@ const struct bpf_func_proto bpf_get_local_storage_proto __weak;
 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_snprintf_btf_proto __weak;
 const struct bpf_func_proto bpf_seq_printf_btf_proto __weak;
+const struct bpf_func_proto bpf_set_retval_proto __weak;
+const struct bpf_func_proto bpf_get_retval_proto __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
@@ -2729,6 +2738,12 @@ bool __weak bpf_jit_needs_zext(void)
        return false;
 }
 
+/* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool __weak bpf_jit_supports_subprog_tailcalls(void)
+{
+       return false;
+}
+
 bool __weak bpf_jit_supports_kfunc_call(void)
 {
        return false;
index 7d5af5b..ab688d8 100644 (file)
@@ -3416,6 +3416,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
                return BPF_PROG_TYPE_SK_LOOKUP;
        case BPF_XDP:
                return BPF_PROG_TYPE_XDP;
+       case BPF_LSM_CGROUP:
+               return BPF_PROG_TYPE_LSM;
        default:
                return BPF_PROG_TYPE_UNSPEC;
        }
@@ -3469,6 +3471,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
        case BPF_PROG_TYPE_CGROUP_SYSCTL:
        case BPF_PROG_TYPE_SOCK_OPS:
+       case BPF_PROG_TYPE_LSM:
+               if (ptype == BPF_PROG_TYPE_LSM &&
+                   prog->expected_attach_type != BPF_LSM_CGROUP)
+                       return -EINVAL;
+
                ret = cgroup_bpf_prog_attach(attr, ptype, prog);
                break;
        default:
@@ -3506,13 +3513,14 @@ static int bpf_prog_detach(const union bpf_attr *attr)
        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
        case BPF_PROG_TYPE_CGROUP_SYSCTL:
        case BPF_PROG_TYPE_SOCK_OPS:
+       case BPF_PROG_TYPE_LSM:
                return cgroup_bpf_prog_detach(attr, ptype);
        default:
                return -EINVAL;
        }
 }
 
-#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
+#define BPF_PROG_QUERY_LAST_FIELD query.prog_attach_flags
 
 static int bpf_prog_query(const union bpf_attr *attr,
                          union bpf_attr __user *uattr)
@@ -3548,6 +3556,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
        case BPF_CGROUP_SYSCTL:
        case BPF_CGROUP_GETSOCKOPT:
        case BPF_CGROUP_SETSOCKOPT:
+       case BPF_LSM_CGROUP:
                return cgroup_bpf_prog_query(attr, uattr);
        case BPF_LIRC_MODE2:
                return lirc_prog_query(attr, uattr);
@@ -4058,6 +4067,11 @@ static int bpf_prog_get_info_by_fd(struct file *file,
 
        if (prog->aux->btf)
                info.btf_id = btf_obj_id(prog->aux->btf);
+       info.attach_btf_id = prog->aux->attach_btf_id;
+       if (prog->aux->attach_btf)
+               info.attach_btf_obj_id = btf_obj_id(prog->aux->attach_btf);
+       else if (prog->aux->dst_prog)
+               info.attach_btf_obj_id = btf_obj_id(prog->aux->dst_prog->aux->attach_btf);
 
        ulen = info.nr_func_info;
        info.nr_func_info = prog->aux->func_info_cnt;
@@ -4540,6 +4554,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
                        ret = bpf_raw_tp_link_attach(prog, NULL);
                else if (prog->expected_attach_type == BPF_TRACE_ITER)
                        ret = bpf_iter_link_attach(attr, uattr, prog);
+               else if (prog->expected_attach_type == BPF_LSM_CGROUP)
+                       ret = cgroup_bpf_link_attach(attr, prog);
                else
                        ret = bpf_tracing_prog_attach(prog,
                                                      attr->link_create.target_fd,
index 93c7675..6cd2265 100644 (file)
@@ -11,6 +11,8 @@
 #include <linux/rcupdate_wait.h>
 #include <linux/module.h>
 #include <linux/static_call.h>
+#include <linux/bpf_verifier.h>
+#include <linux/bpf_lsm.h>
 
 /* dummy _ops. The verifier will operate on target program's ops. */
 const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -410,7 +412,7 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
        }
 }
 
-int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
 {
        enum bpf_tramp_prog_type kind;
        struct bpf_tramp_link *link_exiting;
@@ -418,44 +420,33 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline
        int cnt = 0, i;
 
        kind = bpf_attach_type_to_tramp(link->link.prog);
-       mutex_lock(&tr->mutex);
-       if (tr->extension_prog) {
+       if (tr->extension_prog)
                /* cannot attach fentry/fexit if extension prog is attached.
                 * cannot overwrite extension prog either.
                 */
-               err = -EBUSY;
-               goto out;
-       }
+               return -EBUSY;
 
        for (i = 0; i < BPF_TRAMP_MAX; i++)
                cnt += tr->progs_cnt[i];
 
        if (kind == BPF_TRAMP_REPLACE) {
                /* Cannot attach extension if fentry/fexit are in use. */
-               if (cnt) {
-                       err = -EBUSY;
-                       goto out;
-               }
+               if (cnt)
+                       return -EBUSY;
                tr->extension_prog = link->link.prog;
-               err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
-                                        link->link.prog->bpf_func);
-               goto out;
-       }
-       if (cnt >= BPF_MAX_TRAMP_LINKS) {
-               err = -E2BIG;
-               goto out;
+               return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
+                                         link->link.prog->bpf_func);
        }
-       if (!hlist_unhashed(&link->tramp_hlist)) {
+       if (cnt >= BPF_MAX_TRAMP_LINKS)
+               return -E2BIG;
+       if (!hlist_unhashed(&link->tramp_hlist))
                /* prog already linked */
-               err = -EBUSY;
-               goto out;
-       }
+               return -EBUSY;
        hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
                if (link_exiting->link.prog != link->link.prog)
                        continue;
                /* prog already linked */
-               err = -EBUSY;
-               goto out;
+               return -EBUSY;
        }
 
        hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
@@ -465,34 +456,220 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline
                hlist_del_init(&link->tramp_hlist);
                tr->progs_cnt[kind]--;
        }
-out:
+       return err;
+}
+
+int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+{
+       int err;
+
+       mutex_lock(&tr->mutex);
+       err = __bpf_trampoline_link_prog(link, tr);
        mutex_unlock(&tr->mutex);
        return err;
 }
 
-/* bpf_trampoline_unlink_prog() should never fail. */
-int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
 {
        enum bpf_tramp_prog_type kind;
        int err;
 
        kind = bpf_attach_type_to_tramp(link->link.prog);
-       mutex_lock(&tr->mutex);
        if (kind == BPF_TRAMP_REPLACE) {
                WARN_ON_ONCE(!tr->extension_prog);
                err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
                                         tr->extension_prog->bpf_func, NULL);
                tr->extension_prog = NULL;
-               goto out;
+               return err;
        }
        hlist_del_init(&link->tramp_hlist);
        tr->progs_cnt[kind]--;
-       err = bpf_trampoline_update(tr);
-out:
+       return bpf_trampoline_update(tr);
+}
+
+/* bpf_trampoline_unlink_prog() should never fail. */
+int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+{
+       int err;
+
+       mutex_lock(&tr->mutex);
+       err = __bpf_trampoline_unlink_prog(link, tr);
+       mutex_unlock(&tr->mutex);
+       return err;
+}
+
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
+static void bpf_shim_tramp_link_release(struct bpf_link *link)
+{
+       struct bpf_shim_tramp_link *shim_link =
+               container_of(link, struct bpf_shim_tramp_link, link.link);
+
+       /* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */
+       if (!shim_link->trampoline)
+               return;
+
+       WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline));
+       bpf_trampoline_put(shim_link->trampoline);
+}
+
+static void bpf_shim_tramp_link_dealloc(struct bpf_link *link)
+{
+       struct bpf_shim_tramp_link *shim_link =
+               container_of(link, struct bpf_shim_tramp_link, link.link);
+
+       kfree(shim_link);
+}
+
+static const struct bpf_link_ops bpf_shim_tramp_link_lops = {
+       .release = bpf_shim_tramp_link_release,
+       .dealloc = bpf_shim_tramp_link_dealloc,
+};
+
+static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
+                                                    bpf_func_t bpf_func,
+                                                    int cgroup_atype)
+{
+       struct bpf_shim_tramp_link *shim_link = NULL;
+       struct bpf_prog *p;
+
+       shim_link = kzalloc(sizeof(*shim_link), GFP_USER);
+       if (!shim_link)
+               return NULL;
+
+       p = bpf_prog_alloc(1, 0);
+       if (!p) {
+               kfree(shim_link);
+               return NULL;
+       }
+
+       p->jited = false;
+       p->bpf_func = bpf_func;
+
+       p->aux->cgroup_atype = cgroup_atype;
+       p->aux->attach_func_proto = prog->aux->attach_func_proto;
+       p->aux->attach_btf_id = prog->aux->attach_btf_id;
+       p->aux->attach_btf = prog->aux->attach_btf;
+       btf_get(p->aux->attach_btf);
+       p->type = BPF_PROG_TYPE_LSM;
+       p->expected_attach_type = BPF_LSM_MAC;
+       bpf_prog_inc(p);
+       bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC,
+                     &bpf_shim_tramp_link_lops, p);
+       bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype);
+
+       return shim_link;
+}
+
+static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
+                                                   bpf_func_t bpf_func)
+{
+       struct bpf_tramp_link *link;
+       int kind;
+
+       for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
+               hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
+                       struct bpf_prog *p = link->link.prog;
+
+                       if (p->bpf_func == bpf_func)
+                               return container_of(link, struct bpf_shim_tramp_link, link);
+               }
+       }
+
+       return NULL;
+}
+
+int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
+                                   int cgroup_atype)
+{
+       struct bpf_shim_tramp_link *shim_link = NULL;
+       struct bpf_attach_target_info tgt_info = {};
+       struct bpf_trampoline *tr;
+       bpf_func_t bpf_func;
+       u64 key;
+       int err;
+
+       err = bpf_check_attach_target(NULL, prog, NULL,
+                                     prog->aux->attach_btf_id,
+                                     &tgt_info);
+       if (err)
+               return err;
+
+       key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
+                                        prog->aux->attach_btf_id);
+
+       bpf_lsm_find_cgroup_shim(prog, &bpf_func);
+       tr = bpf_trampoline_get(key, &tgt_info);
+       if (!tr)
+               return  -ENOMEM;
+
+       mutex_lock(&tr->mutex);
+
+       shim_link = cgroup_shim_find(tr, bpf_func);
+       if (shim_link) {
+               /* Reusing existing shim attached by the other program. */
+               bpf_link_inc(&shim_link->link.link);
+
+               mutex_unlock(&tr->mutex);
+               bpf_trampoline_put(tr); /* bpf_trampoline_get above */
+               return 0;
+       }
+
+       /* Allocate and install new shim. */
+
+       shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype);
+       if (!shim_link) {
+               err = -ENOMEM;
+               goto err;
+       }
+
+       err = __bpf_trampoline_link_prog(&shim_link->link, tr);
+       if (err)
+               goto err;
+
+       shim_link->trampoline = tr;
+       /* note, we're still holding tr refcnt from above */
+
        mutex_unlock(&tr->mutex);
+
+       return 0;
+err:
+       mutex_unlock(&tr->mutex);
+
+       if (shim_link)
+               bpf_link_put(&shim_link->link.link);
+
+       /* have to release tr while _not_ holding its mutex */
+       bpf_trampoline_put(tr); /* bpf_trampoline_get above */
+
        return err;
 }
 
+void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
+{
+       struct bpf_shim_tramp_link *shim_link = NULL;
+       struct bpf_trampoline *tr;
+       bpf_func_t bpf_func;
+       u64 key;
+
+       key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
+                                        prog->aux->attach_btf_id);
+
+       bpf_lsm_find_cgroup_shim(prog, &bpf_func);
+       tr = bpf_trampoline_lookup(key);
+       if (WARN_ON_ONCE(!tr))
+               return;
+
+       mutex_lock(&tr->mutex);
+       shim_link = cgroup_shim_find(tr, bpf_func);
+       mutex_unlock(&tr->mutex);
+
+       if (shim_link)
+               bpf_link_put(&shim_link->link.link);
+
+       bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */
+}
+#endif
+
 struct bpf_trampoline *bpf_trampoline_get(u64 key,
                                          struct bpf_attach_target_info *tgt_info)
 {
@@ -625,6 +802,31 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_
        rcu_read_unlock();
 }
 
+u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
+                                       struct bpf_tramp_run_ctx *run_ctx)
+       __acquires(RCU)
+{
+       /* Runtime stats are exported via actual BPF_LSM_CGROUP
+        * programs, not the shims.
+        */
+       rcu_read_lock();
+       migrate_disable();
+
+       run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
+       return NO_START_TIME;
+}
+
+void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
+                                       struct bpf_tramp_run_ctx *run_ctx)
+       __releases(RCU)
+{
+       bpf_reset_run_ctx(run_ctx->saved_run_ctx);
+
+       migrate_enable();
+       rcu_read_unlock();
+}
+
 u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
 {
        rcu_read_lock_trace();
index a4012b3..328cfab 100644 (file)
@@ -6153,7 +6153,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
 
 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
 {
-       return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
+       return env->prog->jit_requested &&
+              bpf_jit_supports_subprog_tailcalls();
 }
 
 static int check_map_func_compatibility(struct bpf_verifier_env *env,
@@ -7121,6 +7122,41 @@ static int check_get_func_ip(struct bpf_verifier_env *env)
        return -ENOTSUPP;
 }
 
+static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
+{
+       return &env->insn_aux_data[env->insn_idx];
+}
+
+static bool loop_flag_is_zero(struct bpf_verifier_env *env)
+{
+       struct bpf_reg_state *regs = cur_regs(env);
+       struct bpf_reg_state *reg = &regs[BPF_REG_4];
+       bool reg_is_null = register_is_null(reg);
+
+       if (reg_is_null)
+               mark_chain_precision(env, BPF_REG_4);
+
+       return reg_is_null;
+}
+
+static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
+{
+       struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
+
+       if (!state->initialized) {
+               state->initialized = 1;
+               state->fit_for_inline = loop_flag_is_zero(env);
+               state->callback_subprogno = subprogno;
+               return;
+       }
+
+       if (!state->fit_for_inline)
+               return;
+
+       state->fit_for_inline = (loop_flag_is_zero(env) &&
+                                state->callback_subprogno == subprogno);
+}
+
 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                             int *insn_idx_p)
 {
@@ -7273,6 +7309,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                err = check_bpf_snprintf_call(env, regs);
                break;
        case BPF_FUNC_loop:
+               update_loop_inline_state(env, meta.subprogno);
                err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
                                        set_loop_callback_state);
                break;
@@ -7282,6 +7319,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                                reg_type_str(env, regs[BPF_REG_1].type));
                        return -EACCES;
                }
+               break;
+       case BPF_FUNC_set_retval:
+               if (env->prog->expected_attach_type == BPF_LSM_CGROUP) {
+                       if (!env->prog->aux->attach_func_proto->type) {
+                               /* Make sure programs that attach to void
+                                * hooks don't try to modify return value.
+                                */
+                               verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
+                               return -EINVAL;
+                       }
+               }
+               break;
        }
 
        if (err)
@@ -7679,11 +7728,6 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
        return true;
 }
 
-static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
-{
-       return &env->insn_aux_data[env->insn_idx];
-}
-
 enum {
        REASON_BOUNDS   = -1,
        REASON_TYPE     = -2,
@@ -9054,7 +9098,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 
        if (opcode == BPF_END || opcode == BPF_NEG) {
                if (opcode == BPF_NEG) {
-                       if (BPF_SRC(insn->code) != 0 ||
+                       if (BPF_SRC(insn->code) != BPF_K ||
                            insn->src_reg != BPF_REG_0 ||
                            insn->off != 0 || insn->imm != 0) {
                                verbose(env, "BPF_NEG uses reserved fields\n");
@@ -10381,11 +10425,21 @@ static int check_return_code(struct bpf_verifier_env *env)
        const bool is_subprog = frame->subprogno;
 
        /* LSM and struct_ops func-ptr's return type could be "void" */
-       if (!is_subprog &&
-           (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
-            prog_type == BPF_PROG_TYPE_LSM) &&
-           !prog->aux->attach_func_proto->type)
-               return 0;
+       if (!is_subprog) {
+               switch (prog_type) {
+               case BPF_PROG_TYPE_LSM:
+                       if (prog->expected_attach_type == BPF_LSM_CGROUP)
+                               /* See below, can be 0 or 0-1 depending on hook. */
+                               break;
+                       fallthrough;
+               case BPF_PROG_TYPE_STRUCT_OPS:
+                       if (!prog->aux->attach_func_proto->type)
+                               return 0;
+                       break;
+               default:
+                       break;
+               }
+       }
 
        /* eBPF calling convention is such that R0 is used
         * to return the value from eBPF program.
@@ -10476,6 +10530,22 @@ static int check_return_code(struct bpf_verifier_env *env)
        case BPF_PROG_TYPE_SK_LOOKUP:
                range = tnum_range(SK_DROP, SK_PASS);
                break;
+
+       case BPF_PROG_TYPE_LSM:
+               if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
+                       /* Regular BPF_PROG_TYPE_LSM programs can return
+                        * any value.
+                        */
+                       return 0;
+               }
+               if (!env->prog->aux->attach_func_proto->type) {
+                       /* Make sure programs that attach to void
+                        * hooks don't try to modify return value.
+                        */
+                       range = tnum_range(1, 1);
+               }
+               break;
+
        case BPF_PROG_TYPE_EXT:
                /* freplace program can return anything as its return value
                 * depends on the to-be-replaced kernel func or bpf program.
@@ -10492,6 +10562,10 @@ static int check_return_code(struct bpf_verifier_env *env)
 
        if (!tnum_in(range, reg->var_off)) {
                verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
+               if (prog->expected_attach_type == BPF_LSM_CGROUP &&
+                   prog_type == BPF_PROG_TYPE_LSM &&
+                   !prog->aux->attach_func_proto->type)
+                       verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
                return -EINVAL;
        }
 
@@ -14296,6 +14370,142 @@ patch_call_imm:
        return 0;
 }
 
+static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
+                                       int position,
+                                       s32 stack_base,
+                                       u32 callback_subprogno,
+                                       u32 *cnt)
+{
+       s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
+       s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
+       s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
+       int reg_loop_max = BPF_REG_6;
+       int reg_loop_cnt = BPF_REG_7;
+       int reg_loop_ctx = BPF_REG_8;
+
+       struct bpf_prog *new_prog;
+       u32 callback_start;
+       u32 call_insn_offset;
+       s32 callback_offset;
+
+       /* This represents an inlined version of bpf_iter.c:bpf_loop,
+        * be careful to modify this code in sync.
+        */
+       struct bpf_insn insn_buf[] = {
+               /* Return error and jump to the end of the patch if
+                * expected number of iterations is too big.
+                */
+               BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
+               BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
+               BPF_JMP_IMM(BPF_JA, 0, 0, 16),
+               /* spill R6, R7, R8 to use these as loop vars */
+               BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
+               BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
+               BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
+               /* initialize loop vars */
+               BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
+               BPF_MOV32_IMM(reg_loop_cnt, 0),
+               BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
+               /* loop header,
+                * if reg_loop_cnt >= reg_loop_max skip the loop body
+                */
+               BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
+               /* callback call,
+                * correct callback offset would be set after patching
+                */
+               BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
+               BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
+               BPF_CALL_REL(0),
+               /* increment loop counter */
+               BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
+               /* jump to loop header if callback returned 0 */
+               BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
+               /* return value of bpf_loop,
+                * set R0 to the number of iterations
+                */
+               BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
+               /* restore original values of R6, R7, R8 */
+               BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
+               BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
+               BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
+       };
+
+       *cnt = ARRAY_SIZE(insn_buf);
+       new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
+       if (!new_prog)
+               return new_prog;
+
+       /* callback start is known only after patching */
+       callback_start = env->subprog_info[callback_subprogno].start;
+       /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
+       call_insn_offset = position + 12;
+       callback_offset = callback_start - call_insn_offset - 1;
+       new_prog->insnsi[call_insn_offset].imm = callback_offset;
+
+       return new_prog;
+}
+
+static bool is_bpf_loop_call(struct bpf_insn *insn)
+{
+       return insn->code == (BPF_JMP | BPF_CALL) &&
+               insn->src_reg == 0 &&
+               insn->imm == BPF_FUNC_loop;
+}
+
+/* For all sub-programs in the program (including main) check
+ * insn_aux_data to see if there are bpf_loop calls that require
+ * inlining. If such calls are found the calls are replaced with a
+ * sequence of instructions produced by `inline_bpf_loop` function and
+ * subprog stack_depth is increased by the size of 3 registers.
+ * This stack space is used to spill values of the R6, R7, R8.  These
+ * registers are used to store the loop bound, counter and context
+ * variables.
+ */
+static int optimize_bpf_loop(struct bpf_verifier_env *env)
+{
+       struct bpf_subprog_info *subprogs = env->subprog_info;
+       int i, cur_subprog = 0, cnt, delta = 0;
+       struct bpf_insn *insn = env->prog->insnsi;
+       int insn_cnt = env->prog->len;
+       u16 stack_depth = subprogs[cur_subprog].stack_depth;
+       u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+       u16 stack_depth_extra = 0;
+
+       for (i = 0; i < insn_cnt; i++, insn++) {
+               struct bpf_loop_inline_state *inline_state =
+                       &env->insn_aux_data[i + delta].loop_inline_state;
+
+               if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
+                       struct bpf_prog *new_prog;
+
+                       stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
+                       new_prog = inline_bpf_loop(env,
+                                                  i + delta,
+                                                  -(stack_depth + stack_depth_extra),
+                                                  inline_state->callback_subprogno,
+                                                  &cnt);
+                       if (!new_prog)
+                               return -ENOMEM;
+
+                       delta     += cnt - 1;
+                       env->prog  = new_prog;
+                       insn       = new_prog->insnsi + i + delta;
+               }
+
+               if (subprogs[cur_subprog + 1].start == i + delta + 1) {
+                       subprogs[cur_subprog].stack_depth += stack_depth_extra;
+                       cur_subprog++;
+                       stack_depth = subprogs[cur_subprog].stack_depth;
+                       stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+                       stack_depth_extra = 0;
+               }
+       }
+
+       env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
+
+       return 0;
+}
+
 static void free_states(struct bpf_verifier_env *env)
 {
        struct bpf_verifier_state_list *sl, *sln;
@@ -14715,6 +14925,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
                fallthrough;
        case BPF_MODIFY_RETURN:
        case BPF_LSM_MAC:
+       case BPF_LSM_CGROUP:
        case BPF_TRACE_FENTRY:
        case BPF_TRACE_FEXIT:
                if (!btf_type_is_func(t)) {
@@ -15033,6 +15244,9 @@ skip_full_check:
                ret = check_max_stack_depth(env);
 
        /* instruction rewrites happen after this point */
+       if (ret == 0)
+               ret = optimize_bpf_loop(env);
+
        if (is_priv) {
                if (ret == 0)
                        opt_hard_wire_dead_code_branches(env);
index 1f5351c..88ba5b4 100644 (file)
@@ -1343,6 +1343,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
        int size, esize;
        int rctx;
 
+#ifdef CONFIG_BPF_EVENTS
        if (bpf_prog_array_valid(call)) {
                u32 ret;
 
@@ -1350,6 +1351,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
                if (!ret)
                        return;
        }
+#endif /* CONFIG_BPF_EVENTS */
 
        esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
 
index 2a7836e..5820704 100644 (file)
@@ -14733,9 +14733,9 @@ static struct skb_segment_test skb_segment_tests[] __initconst = {
                .build_skb = build_test_skb_linear_no_head_frag,
                .features = NETIF_F_SG | NETIF_F_FRAGLIST |
                            NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GSO |
-                           NETIF_F_LLTX_BIT | NETIF_F_GRO |
+                           NETIF_F_LLTX | NETIF_F_GRO |
                            NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
-                           NETIF_F_HW_VLAN_STAG_TX_BIT
+                           NETIF_F_HW_VLAN_STAG_TX
        }
 };
 
index 994d916..4ef77ec 100644 (file)
@@ -5012,8 +5012,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
        .arg1_type      = ARG_PTR_TO_CTX,
 };
 
-static int _bpf_setsockopt(struct sock *sk, int level, int optname,
-                          char *optval, int optlen)
+static int __bpf_setsockopt(struct sock *sk, int level, int optname,
+                           char *optval, int optlen)
 {
        char devname[IFNAMSIZ];
        int val, valbool;
@@ -5024,8 +5024,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
        if (!sk_fullsock(sk))
                return -EINVAL;
 
-       sock_owned_by_me(sk);
-
        if (level == SOL_SOCKET) {
                if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
                        return -EINVAL;
@@ -5258,14 +5256,20 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
        return ret;
 }
 
-static int _bpf_getsockopt(struct sock *sk, int level, int optname,
+static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                           char *optval, int optlen)
+{
+       if (sk_fullsock(sk))
+               sock_owned_by_me(sk);
+       return __bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+static int __bpf_getsockopt(struct sock *sk, int level, int optname,
+                           char *optval, int optlen)
 {
        if (!sk_fullsock(sk))
                goto err_clear;
 
-       sock_owned_by_me(sk);
-
        if (level == SOL_SOCKET) {
                if (optlen != sizeof(int))
                        goto err_clear;
@@ -5360,6 +5364,14 @@ err_clear:
        return -EINVAL;
 }
 
+static int _bpf_getsockopt(struct sock *sk, int level, int optname,
+                          char *optval, int optlen)
+{
+       if (sk_fullsock(sk))
+               sock_owned_by_me(sk);
+       return __bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
 BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
           int, optname, char *, optval, int, optlen)
 {
@@ -5400,6 +5412,40 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = {
        .arg5_type      = ARG_CONST_SIZE,
 };
 
+BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
+          int, optname, char *, optval, int, optlen)
+{
+       return __bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto = {
+       .func           = bpf_unlocked_sk_setsockopt,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
+       .arg5_type      = ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_unlocked_sk_getsockopt, struct sock *, sk, int, level,
+          int, optname, char *, optval, int, optlen)
+{
+       return __bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto = {
+       .func           = bpf_unlocked_sk_getsockopt,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg5_type      = ARG_CONST_SIZE,
+};
+
 BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
           int, level, int, optname, char *, optval, int, optlen)
 {
@@ -6470,8 +6516,8 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
                 u64 flags)
 {
        struct sock *sk = NULL;
-       u8 family = AF_UNSPEC;
        struct net *net;
+       u8 family;
        int sdif;
 
        if (len == sizeof(tuple->ipv4))
@@ -6481,8 +6527,7 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
        else
                return NULL;
 
-       if (unlikely(family == AF_UNSPEC || flags ||
-                    !((s32)netns_id < 0 || netns_id <= S32_MAX)))
+       if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX)))
                goto out;
 
        if (family == AF_INET)
index fc69154..266d3b7 100644 (file)
@@ -497,23 +497,27 @@ bool sk_msg_is_readable(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(sk_msg_is_readable);
 
-static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
-                                                 struct sk_buff *skb)
+static struct sk_msg *alloc_sk_msg(void)
 {
        struct sk_msg *msg;
 
-       if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+       msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
+       if (unlikely(!msg))
                return NULL;
+       sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS);
+       return msg;
+}
 
-       if (!sk_rmem_schedule(sk, skb, skb->truesize))
+static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
+                                                 struct sk_buff *skb)
+{
+       if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
                return NULL;
 
-       msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
-       if (unlikely(!msg))
+       if (!sk_rmem_schedule(sk, skb, skb->truesize))
                return NULL;
 
-       sk_msg_init(msg);
-       return msg;
+       return alloc_sk_msg();
 }
 
 static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
@@ -590,13 +594,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
 static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
                                     u32 off, u32 len)
 {
-       struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+       struct sk_msg *msg = alloc_sk_msg();
        struct sock *sk = psock->sk;
        int err;
 
        if (unlikely(!msg))
                return -EAGAIN;
-       sk_msg_init(msg);
        skb_set_owner_r(skb, sk);
        err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
        if (err < 0)
@@ -1165,21 +1168,14 @@ static void sk_psock_done_strp(struct sk_psock *psock)
 }
 #endif /* CONFIG_BPF_STREAM_PARSER */
 
-static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
-                                unsigned int offset, size_t orig_len)
+static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
 {
-       struct sock *sk = (struct sock *)desc->arg.data;
        struct sk_psock *psock;
        struct bpf_prog *prog;
        int ret = __SK_DROP;
-       int len = orig_len;
+       int len = skb->len;
 
-       /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
-       skb = skb_clone(skb, GFP_ATOMIC);
-       if (!skb) {
-               desc->error = -ENOMEM;
-               return 0;
-       }
+       skb_get(skb);
 
        rcu_read_lock();
        psock = sk_psock(sk);
@@ -1192,12 +1188,10 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
        if (!prog)
                prog = READ_ONCE(psock->progs.skb_verdict);
        if (likely(prog)) {
-               skb->sk = sk;
                skb_dst_drop(skb);
                skb_bpf_redirect_clear(skb);
                ret = bpf_prog_run_pin_on_cpu(prog, skb);
                ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
-               skb->sk = NULL;
        }
        if (sk_psock_verdict_apply(psock, skb, ret) < 0)
                len = 0;
@@ -1209,16 +1203,10 @@ out:
 static void sk_psock_verdict_data_ready(struct sock *sk)
 {
        struct socket *sock = sk->sk_socket;
-       read_descriptor_t desc;
 
-       if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
+       if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
                return;
-
-       desc.arg.data = sk;
-       desc.error = 0;
-       desc.count = 1;
-
-       sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
+       sock->ops->read_skb(sk, sk_psock_verdict_recv);
 }
 
 void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
index 9f08ccf..028813d 100644 (file)
@@ -1578,7 +1578,7 @@ void sock_map_destroy(struct sock *sk)
        saved_destroy = psock->saved_destroy;
        sock_map_remove_links(sk, psock);
        rcu_read_unlock();
-       sk_psock_stop(psock, true);
+       sk_psock_stop(psock, false);
        sk_psock_put(sk, psock);
        saved_destroy(sk);
 }
index da81f56..7abd652 100644 (file)
@@ -1040,6 +1040,7 @@ const struct proto_ops inet_stream_ops = {
        .sendpage          = inet_sendpage,
        .splice_read       = tcp_splice_read,
        .read_sock         = tcp_read_sock,
+       .read_skb          = tcp_read_skb,
        .sendmsg_locked    = tcp_sendmsg_locked,
        .sendpage_locked   = tcp_sendpage_locked,
        .peek_len          = tcp_peek_len,
@@ -1067,7 +1068,7 @@ const struct proto_ops inet_dgram_ops = {
        .setsockopt        = sock_common_setsockopt,
        .getsockopt        = sock_common_getsockopt,
        .sendmsg           = inet_sendmsg,
-       .read_sock         = udp_read_sock,
+       .read_skb          = udp_read_skb,
        .recvmsg           = inet_recvmsg,
        .mmap              = sock_no_mmap,
        .sendpage          = inet_sendpage,
index f79ab94..7a18163 100644 (file)
 /* "extern" is to avoid sparse warning.  It is only used in bpf_struct_ops.c. */
 extern struct bpf_struct_ops bpf_tcp_congestion_ops;
 
-static u32 optional_ops[] = {
-       offsetof(struct tcp_congestion_ops, init),
-       offsetof(struct tcp_congestion_ops, release),
-       offsetof(struct tcp_congestion_ops, set_state),
-       offsetof(struct tcp_congestion_ops, cwnd_event),
-       offsetof(struct tcp_congestion_ops, in_ack_event),
-       offsetof(struct tcp_congestion_ops, pkts_acked),
-       offsetof(struct tcp_congestion_ops, min_tso_segs),
-       offsetof(struct tcp_congestion_ops, sndbuf_expand),
-       offsetof(struct tcp_congestion_ops, cong_control),
-};
-
 static u32 unsupported_ops[] = {
        offsetof(struct tcp_congestion_ops, get_info),
 };
@@ -51,18 +39,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
        return 0;
 }
 
-static bool is_optional(u32 member_offset)
-{
-       unsigned int i;
-
-       for (i = 0; i < ARRAY_SIZE(optional_ops); i++) {
-               if (member_offset == optional_ops[i])
-                       return true;
-       }
-
-       return false;
-}
-
 static bool is_unsupported(u32 member_offset)
 {
        unsigned int i;
@@ -111,6 +87,12 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
        }
 
        switch (off) {
+       case offsetof(struct sock, sk_pacing_rate):
+               end = offsetofend(struct sock, sk_pacing_rate);
+               break;
+       case offsetof(struct sock, sk_pacing_status):
+               end = offsetofend(struct sock, sk_pacing_status);
+               break;
        case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv):
                end = offsetofend(struct inet_connection_sock, icsk_ca_priv);
                break;
@@ -240,7 +222,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
 {
        const struct tcp_congestion_ops *utcp_ca;
        struct tcp_congestion_ops *tcp_ca;
-       int prog_fd;
        u32 moff;
 
        utcp_ca = (const struct tcp_congestion_ops *)udata;
@@ -262,14 +243,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
                return 1;
        }
 
-       if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL))
-               return 0;
-
-       /* Ensure bpf_prog is provided for compulsory func ptr */
-       prog_fd = (int)(*(unsigned long *)(udata + moff));
-       if (!prog_fd && !is_optional(moff) && !is_unsupported(moff))
-               return -EINVAL;
-
        return 0;
 }
 
index d2ca56a..21bdee8 100644 (file)
@@ -1734,6 +1734,50 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 }
 EXPORT_SYMBOL(tcp_read_sock);
 
+int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+       u32 seq = tp->copied_seq;
+       struct sk_buff *skb;
+       int copied = 0;
+       u32 offset;
+
+       if (sk->sk_state == TCP_LISTEN)
+               return -ENOTCONN;
+
+       while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+               int used;
+
+               __skb_unlink(skb, &sk->sk_receive_queue);
+               used = recv_actor(sk, skb);
+               if (used <= 0) {
+                       if (!copied)
+                               copied = used;
+                       break;
+               }
+               seq += used;
+               copied += used;
+
+               if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
+                       consume_skb(skb);
+                       ++seq;
+                       break;
+               }
+               consume_skb(skb);
+               break;
+       }
+       WRITE_ONCE(tp->copied_seq, seq);
+
+       tcp_rcv_space_adjust(sk);
+
+       /* Clean up data we have read: This will do ACK frames. */
+       if (copied > 0)
+               tcp_cleanup_rbuf(sk, copied);
+
+       return copied;
+}
+EXPORT_SYMBOL(tcp_read_skb);
+
 int tcp_peek_len(struct socket *sock)
 {
        return tcp_inq(sock->sk);
index 6172b47..2516078 100644 (file)
@@ -1797,8 +1797,7 @@ busy_check:
 }
 EXPORT_SYMBOL(__skb_recv_udp);
 
-int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
-                 sk_read_actor_t recv_actor)
+int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
 {
        int copied = 0;
 
@@ -1820,7 +1819,8 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
                        continue;
                }
 
-               used = recv_actor(desc, skb, 0, skb->len);
+               WARN_ON(!skb_set_owner_sk_safe(skb, sk));
+               used = recv_actor(sk, skb);
                if (used <= 0) {
                        if (!copied)
                                copied = used;
@@ -1831,13 +1831,12 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
                }
 
                kfree_skb(skb);
-               if (!desc->count)
-                       break;
+               break;
        }
 
        return copied;
 }
-EXPORT_SYMBOL(udp_read_sock);
+EXPORT_SYMBOL(udp_read_skb);
 
 /*
  *     This should be easy, if there is something there we
index 658823e..0ee0770 100644 (file)
@@ -702,6 +702,7 @@ const struct proto_ops inet6_stream_ops = {
        .sendpage_locked   = tcp_sendpage_locked,
        .splice_read       = tcp_splice_read,
        .read_sock         = tcp_read_sock,
+       .read_skb          = tcp_read_skb,
        .peek_len          = tcp_peek_len,
 #ifdef CONFIG_COMPAT
        .compat_ioctl      = inet6_compat_ioctl,
@@ -727,7 +728,7 @@ const struct proto_ops inet6_dgram_ops = {
        .getsockopt        = sock_common_getsockopt,    /* ok           */
        .sendmsg           = inet6_sendmsg,             /* retpoline's sake */
        .recvmsg           = inet6_recvmsg,             /* retpoline's sake */
-       .read_sock         = udp_read_sock,
+       .read_skb          = udp_read_skb,
        .mmap              = sock_no_mmap,
        .sendpage          = sock_no_sendpage,
        .set_peek_off      = sk_set_peek_off,
index 784b4b3..bf338b7 100644 (file)
@@ -763,10 +763,8 @@ static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
                                       unsigned int flags);
 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
-static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
-                         sk_read_actor_t recv_actor);
-static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
-                                sk_read_actor_t recv_actor);
+static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
+static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
 static int unix_dgram_connect(struct socket *, struct sockaddr *,
                              int, int);
 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -820,7 +818,7 @@ static const struct proto_ops unix_stream_ops = {
        .shutdown =     unix_shutdown,
        .sendmsg =      unix_stream_sendmsg,
        .recvmsg =      unix_stream_recvmsg,
-       .read_sock =    unix_stream_read_sock,
+       .read_skb =     unix_stream_read_skb,
        .mmap =         sock_no_mmap,
        .sendpage =     unix_stream_sendpage,
        .splice_read =  unix_stream_splice_read,
@@ -845,7 +843,7 @@ static const struct proto_ops unix_dgram_ops = {
        .listen =       sock_no_listen,
        .shutdown =     unix_shutdown,
        .sendmsg =      unix_dgram_sendmsg,
-       .read_sock =    unix_read_sock,
+       .read_skb =     unix_read_skb,
        .recvmsg =      unix_dgram_recvmsg,
        .mmap =         sock_no_mmap,
        .sendpage =     sock_no_sendpage,
@@ -2506,8 +2504,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si
        return __unix_dgram_recvmsg(sk, msg, size, flags);
 }
 
-static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
-                         sk_read_actor_t recv_actor)
+static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
 {
        int copied = 0;
 
@@ -2522,7 +2519,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
                if (!skb)
                        return err;
 
-               used = recv_actor(desc, skb, 0, skb->len);
+               used = recv_actor(sk, skb);
                if (used <= 0) {
                        if (!copied)
                                copied = used;
@@ -2533,8 +2530,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
                }
 
                kfree_skb(skb);
-               if (!desc->count)
-                       break;
+               break;
        }
 
        return copied;
@@ -2669,13 +2665,12 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
 }
 #endif
 
-static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
-                                sk_read_actor_t recv_actor)
+static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
 {
        if (unlikely(sk->sk_state != TCP_ESTABLISHED))
                return -ENOTCONN;
 
-       return unix_read_sock(sk, desc, recv_actor);
+       return unix_read_skb(sk, recv_actor);
 }
 
 static int unix_stream_read_generic(struct unix_stream_read_state *state,
index 03e3d35..5002a5b 100644 (file)
@@ -45,9 +45,6 @@ tprogs-y += xdp_rxq_info
 tprogs-y += syscall_tp
 tprogs-y += cpustat
 tprogs-y += xdp_adjust_tail
-tprogs-y += xdpsock
-tprogs-y += xdpsock_ctrl_proc
-tprogs-y += xsk_fwd
 tprogs-y += xdp_fwd
 tprogs-y += task_fd_query
 tprogs-y += xdp_sample_pkts
@@ -109,9 +106,6 @@ xdp_rxq_info-objs := xdp_rxq_info_user.o
 syscall_tp-objs := syscall_tp_user.o
 cpustat-objs := cpustat_user.o
 xdp_adjust_tail-objs := xdp_adjust_tail_user.o
-xdpsock-objs := xdpsock_user.o
-xdpsock_ctrl_proc-objs := xdpsock_ctrl_proc.o
-xsk_fwd-objs := xsk_fwd.o
 xdp_fwd-objs := xdp_fwd_user.o
 task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS)
 xdp_sample_pkts-objs := xdp_sample_pkts_user.o
@@ -179,7 +173,6 @@ always-y += xdp_sample_pkts_kern.o
 always-y += ibumad_kern.o
 always-y += hbm_out_kern.o
 always-y += hbm_edt_kern.o
-always-y += xdpsock_kern.o
 
 ifeq ($(ARCH), arm)
 # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux
@@ -224,8 +217,6 @@ TPROGLDLIBS_tracex4         += -lrt
 TPROGLDLIBS_trace_output       += -lrt
 TPROGLDLIBS_map_perf_test      += -lrt
 TPROGLDLIBS_test_overhead      += -lrt
-TPROGLDLIBS_xdpsock            += -pthread -lcap
-TPROGLDLIBS_xsk_fwd            += -pthread
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 # make M=samples/bpf LLC=~/git/llvm-project/llvm/build/bin/llc CLANG=~/git/llvm-project/llvm/build/bin/clang
index f0c5d95..0a5c704 100644 (file)
@@ -39,11 +39,13 @@ static int parse_ipv6(void *data, u64 nh_off, void *data_end)
        return ip6h->nexthdr;
 }
 
-SEC("xdp1")
+#define XDPBUFSIZE     64
+SEC("xdp.frags")
 int xdp_prog1(struct xdp_md *ctx)
 {
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data = (void *)(long)ctx->data;
+       __u8 pkt[XDPBUFSIZE] = {};
+       void *data_end = &pkt[XDPBUFSIZE-1];
+       void *data = pkt;
        struct ethhdr *eth = data;
        int rc = XDP_DROP;
        long *value;
@@ -51,6 +53,9 @@ int xdp_prog1(struct xdp_md *ctx)
        u64 nh_off;
        u32 ipproto;
 
+       if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt)))
+               return rc;
+
        nh_off = sizeof(*eth);
        if (data + nh_off > data_end)
                return rc;
index d8a64ab..3332ba6 100644 (file)
@@ -55,11 +55,13 @@ static int parse_ipv6(void *data, u64 nh_off, void *data_end)
        return ip6h->nexthdr;
 }
 
-SEC("xdp1")
+#define XDPBUFSIZE     64
+SEC("xdp.frags")
 int xdp_prog1(struct xdp_md *ctx)
 {
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data = (void *)(long)ctx->data;
+       __u8 pkt[XDPBUFSIZE] = {};
+       void *data_end = &pkt[XDPBUFSIZE-1];
+       void *data = pkt;
        struct ethhdr *eth = data;
        int rc = XDP_DROP;
        long *value;
@@ -67,6 +69,9 @@ int xdp_prog1(struct xdp_md *ctx)
        u64 nh_off;
        u32 ipproto;
 
+       if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt)))
+               return rc;
+
        nh_off = sizeof(*eth);
        if (data + nh_off > data_end)
                return rc;
index 575d57e..0e2bca3 100644 (file)
@@ -212,7 +212,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
        return XDP_TX;
 }
 
-SEC("xdp_tx_iptunnel")
+SEC("xdp.frags")
 int _xdp_tx_iptunnel(struct xdp_md *xdp)
 {
        void *data_end = (void *)(long)xdp->data_end;
diff --git a/samples/bpf/xdpsock.h b/samples/bpf/xdpsock.h
deleted file mode 100644 (file)
index fd70cce..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright(c) 2019 Intel Corporation.
- */
-
-#ifndef XDPSOCK_H_
-#define XDPSOCK_H_
-
-#define MAX_SOCKS 4
-
-#define SOCKET_NAME "sock_cal_bpf_fd"
-#define MAX_NUM_OF_CLIENTS 10
-
-#define CLOSE_CONN  1
-
-typedef __u64 u64;
-typedef __u32 u32;
-
-#endif /* XDPSOCK_H */
diff --git a/samples/bpf/xdpsock_ctrl_proc.c b/samples/bpf/xdpsock_ctrl_proc.c
deleted file mode 100644 (file)
index 28b5f2a..0000000
+++ /dev/null
@@ -1,190 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2017 - 2018 Intel Corporation. */
-
-#include <errno.h>
-#include <getopt.h>
-#include <libgen.h>
-#include <net/if.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <unistd.h>
-
-#include <bpf/bpf.h>
-#include <bpf/xsk.h>
-#include "xdpsock.h"
-
-/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-static const char *opt_if = "";
-
-static struct option long_options[] = {
-       {"interface", required_argument, 0, 'i'},
-       {0, 0, 0, 0}
-};
-
-static void usage(const char *prog)
-{
-       const char *str =
-               "  Usage: %s [OPTIONS]\n"
-               "  Options:\n"
-               "  -i, --interface=n    Run on interface n\n"
-               "\n";
-       fprintf(stderr, "%s\n", str);
-
-       exit(0);
-}
-
-static void parse_command_line(int argc, char **argv)
-{
-       int option_index, c;
-
-       opterr = 0;
-
-       for (;;) {
-               c = getopt_long(argc, argv, "i:",
-                               long_options, &option_index);
-               if (c == -1)
-                       break;
-
-               switch (c) {
-               case 'i':
-                       opt_if = optarg;
-                       break;
-               default:
-                       usage(basename(argv[0]));
-               }
-       }
-}
-
-static int send_xsks_map_fd(int sock, int fd)
-{
-       char cmsgbuf[CMSG_SPACE(sizeof(int))];
-       struct msghdr msg;
-       struct iovec iov;
-       int value = 0;
-
-       if (fd == -1) {
-               fprintf(stderr, "Incorrect fd = %d\n", fd);
-               return -1;
-       }
-       iov.iov_base = &value;
-       iov.iov_len = sizeof(int);
-
-       msg.msg_name = NULL;
-       msg.msg_namelen = 0;
-       msg.msg_iov = &iov;
-       msg.msg_iovlen = 1;
-       msg.msg_flags = 0;
-       msg.msg_control = cmsgbuf;
-       msg.msg_controllen = CMSG_LEN(sizeof(int));
-
-       struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
-
-       cmsg->cmsg_level = SOL_SOCKET;
-       cmsg->cmsg_type = SCM_RIGHTS;
-       cmsg->cmsg_len = CMSG_LEN(sizeof(int));
-
-       *(int *)CMSG_DATA(cmsg) = fd;
-       int ret = sendmsg(sock, &msg, 0);
-
-       if (ret == -1) {
-               fprintf(stderr, "Sendmsg failed with %s", strerror(errno));
-               return -errno;
-       }
-
-       return ret;
-}
-
-int
-main(int argc, char **argv)
-{
-       struct sockaddr_un server;
-       int listening = 1;
-       int rval, msgsock;
-       int ifindex = 0;
-       int flag = 1;
-       int cmd = 0;
-       int sock;
-       int err;
-       int xsks_map_fd;
-
-       parse_command_line(argc, argv);
-
-       ifindex = if_nametoindex(opt_if);
-       if (ifindex == 0) {
-               fprintf(stderr, "Unable to get ifindex for Interface %s. Reason:%s",
-                       opt_if, strerror(errno));
-               return -errno;
-       }
-
-       sock = socket(AF_UNIX, SOCK_STREAM, 0);
-       if (sock < 0) {
-               fprintf(stderr, "Opening socket stream failed: %s", strerror(errno));
-               return -errno;
-       }
-
-       server.sun_family = AF_UNIX;
-       strcpy(server.sun_path, SOCKET_NAME);
-
-       setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof(int));
-
-       if (bind(sock, (struct sockaddr *)&server, sizeof(struct sockaddr_un))) {
-               fprintf(stderr, "Binding to socket stream failed: %s", strerror(errno));
-               return -errno;
-       }
-
-       listen(sock, MAX_NUM_OF_CLIENTS);
-
-       err = xsk_setup_xdp_prog(ifindex, &xsks_map_fd);
-       if (err) {
-               fprintf(stderr, "Setup of xdp program failed\n");
-               goto close_sock;
-       }
-
-       while (listening) {
-               msgsock = accept(sock, 0, 0);
-               if (msgsock == -1) {
-                       fprintf(stderr, "Error accepting connection: %s", strerror(errno));
-                       err = -errno;
-                       goto close_sock;
-               }
-               err = send_xsks_map_fd(msgsock, xsks_map_fd);
-               if (err <= 0) {
-                       fprintf(stderr, "Error %d sending xsks_map_fd\n", err);
-                       goto cleanup;
-               }
-               do {
-                       rval = read(msgsock, &cmd, sizeof(int));
-                       if (rval < 0) {
-                               fprintf(stderr, "Error reading stream message");
-                       } else {
-                               if (cmd != CLOSE_CONN)
-                                       fprintf(stderr, "Recv unknown cmd = %d\n", cmd);
-                               listening = 0;
-                               break;
-                       }
-               } while (rval > 0);
-       }
-       close(msgsock);
-       close(sock);
-       unlink(SOCKET_NAME);
-
-       /* Unset fd for given ifindex */
-       err = bpf_xdp_detach(ifindex, 0, NULL);
-       if (err) {
-               fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex);
-               return err;
-       }
-
-       return 0;
-
-cleanup:
-       close(msgsock);
-close_sock:
-       close(sock);
-       unlink(SOCKET_NAME);
-       return err;
-}
diff --git a/samples/bpf/xdpsock_kern.c b/samples/bpf/xdpsock_kern.c
deleted file mode 100644 (file)
index 0543048..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "xdpsock.h"
-
-/* This XDP program is only needed for the XDP_SHARED_UMEM mode.
- * If you do not use this mode, libbpf can supply an XDP program for you.
- */
-
-struct {
-       __uint(type, BPF_MAP_TYPE_XSKMAP);
-       __uint(max_entries, MAX_SOCKS);
-       __uint(key_size, sizeof(int));
-       __uint(value_size, sizeof(int));
-} xsks_map SEC(".maps");
-
-static unsigned int rr;
-
-SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
-{
-       rr = (rr + 1) & (MAX_SOCKS - 1);
-
-       return bpf_redirect_map(&xsks_map, rr, XDP_DROP);
-}
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
deleted file mode 100644 (file)
index be7d257..0000000
+++ /dev/null
@@ -1,2019 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2017 - 2018 Intel Corporation. */
-
-#include <errno.h>
-#include <getopt.h>
-#include <libgen.h>
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <linux/if_xdp.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/limits.h>
-#include <linux/udp.h>
-#include <arpa/inet.h>
-#include <locale.h>
-#include <net/ethernet.h>
-#include <netinet/ether.h>
-#include <net/if.h>
-#include <poll.h>
-#include <pthread.h>
-#include <signal.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/capability.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <sys/un.h>
-#include <time.h>
-#include <unistd.h>
-#include <sched.h>
-
-#include <bpf/libbpf.h>
-#include <bpf/xsk.h>
-#include <bpf/bpf.h>
-#include "xdpsock.h"
-
-/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#ifndef SOL_XDP
-#define SOL_XDP 283
-#endif
-
-#ifndef AF_XDP
-#define AF_XDP 44
-#endif
-
-#ifndef PF_XDP
-#define PF_XDP AF_XDP
-#endif
-
-#define NUM_FRAMES (4 * 1024)
-#define MIN_PKT_SIZE 64
-
-#define DEBUG_HEXDUMP 0
-
-#define VLAN_PRIO_MASK         0xe000 /* Priority Code Point */
-#define VLAN_PRIO_SHIFT                13
-#define VLAN_VID_MASK          0x0fff /* VLAN Identifier */
-#define VLAN_VID__DEFAULT      1
-#define VLAN_PRI__DEFAULT      0
-
-#define NSEC_PER_SEC           1000000000UL
-#define NSEC_PER_USEC          1000
-
-#define SCHED_PRI__DEFAULT     0
-
-typedef __u64 u64;
-typedef __u32 u32;
-typedef __u16 u16;
-typedef __u8  u8;
-
-static unsigned long prev_time;
-static long tx_cycle_diff_min;
-static long tx_cycle_diff_max;
-static double tx_cycle_diff_ave;
-static long tx_cycle_cnt;
-
-enum benchmark_type {
-       BENCH_RXDROP = 0,
-       BENCH_TXONLY = 1,
-       BENCH_L2FWD = 2,
-};
-
-static enum benchmark_type opt_bench = BENCH_RXDROP;
-static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static const char *opt_if = "";
-static int opt_ifindex;
-static int opt_queue;
-static unsigned long opt_duration;
-static unsigned long start_time;
-static bool benchmark_done;
-static u32 opt_batch_size = 64;
-static int opt_pkt_count;
-static u16 opt_pkt_size = MIN_PKT_SIZE;
-static u32 opt_pkt_fill_pattern = 0x12345678;
-static bool opt_vlan_tag;
-static u16 opt_pkt_vlan_id = VLAN_VID__DEFAULT;
-static u16 opt_pkt_vlan_pri = VLAN_PRI__DEFAULT;
-static struct ether_addr opt_txdmac = {{ 0x3c, 0xfd, 0xfe,
-                                        0x9e, 0x7f, 0x71 }};
-static struct ether_addr opt_txsmac = {{ 0xec, 0xb1, 0xd7,
-                                        0x98, 0x3a, 0xc0 }};
-static bool opt_extra_stats;
-static bool opt_quiet;
-static bool opt_app_stats;
-static const char *opt_irq_str = "";
-static u32 irq_no;
-static int irqs_at_init = -1;
-static u32 sequence;
-static int opt_poll;
-static int opt_interval = 1;
-static int opt_retries = 3;
-static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
-static u32 opt_umem_flags;
-static int opt_unaligned_chunks;
-static int opt_mmap_flags;
-static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
-static int opt_timeout = 1000;
-static bool opt_need_wakeup = true;
-static u32 opt_num_xsks = 1;
-static u32 prog_id;
-static bool opt_busy_poll;
-static bool opt_reduced_cap;
-static clockid_t opt_clock = CLOCK_MONOTONIC;
-static unsigned long opt_tx_cycle_ns;
-static int opt_schpolicy = SCHED_OTHER;
-static int opt_schprio = SCHED_PRI__DEFAULT;
-static bool opt_tstamp;
-
-struct vlan_ethhdr {
-       unsigned char h_dest[6];
-       unsigned char h_source[6];
-       __be16 h_vlan_proto;
-       __be16 h_vlan_TCI;
-       __be16 h_vlan_encapsulated_proto;
-};
-
-#define PKTGEN_MAGIC 0xbe9be955
-struct pktgen_hdr {
-       __be32 pgh_magic;
-       __be32 seq_num;
-       __be32 tv_sec;
-       __be32 tv_usec;
-};
-
-struct xsk_ring_stats {
-       unsigned long rx_npkts;
-       unsigned long tx_npkts;
-       unsigned long rx_dropped_npkts;
-       unsigned long rx_invalid_npkts;
-       unsigned long tx_invalid_npkts;
-       unsigned long rx_full_npkts;
-       unsigned long rx_fill_empty_npkts;
-       unsigned long tx_empty_npkts;
-       unsigned long prev_rx_npkts;
-       unsigned long prev_tx_npkts;
-       unsigned long prev_rx_dropped_npkts;
-       unsigned long prev_rx_invalid_npkts;
-       unsigned long prev_tx_invalid_npkts;
-       unsigned long prev_rx_full_npkts;
-       unsigned long prev_rx_fill_empty_npkts;
-       unsigned long prev_tx_empty_npkts;
-};
-
-struct xsk_driver_stats {
-       unsigned long intrs;
-       unsigned long prev_intrs;
-};
-
-struct xsk_app_stats {
-       unsigned long rx_empty_polls;
-       unsigned long fill_fail_polls;
-       unsigned long copy_tx_sendtos;
-       unsigned long tx_wakeup_sendtos;
-       unsigned long opt_polls;
-       unsigned long prev_rx_empty_polls;
-       unsigned long prev_fill_fail_polls;
-       unsigned long prev_copy_tx_sendtos;
-       unsigned long prev_tx_wakeup_sendtos;
-       unsigned long prev_opt_polls;
-};
-
-struct xsk_umem_info {
-       struct xsk_ring_prod fq;
-       struct xsk_ring_cons cq;
-       struct xsk_umem *umem;
-       void *buffer;
-};
-
-struct xsk_socket_info {
-       struct xsk_ring_cons rx;
-       struct xsk_ring_prod tx;
-       struct xsk_umem_info *umem;
-       struct xsk_socket *xsk;
-       struct xsk_ring_stats ring_stats;
-       struct xsk_app_stats app_stats;
-       struct xsk_driver_stats drv_stats;
-       u32 outstanding_tx;
-};
-
-static const struct clockid_map {
-       const char *name;
-       clockid_t clockid;
-} clockids_map[] = {
-       { "REALTIME", CLOCK_REALTIME },
-       { "TAI", CLOCK_TAI },
-       { "BOOTTIME", CLOCK_BOOTTIME },
-       { "MONOTONIC", CLOCK_MONOTONIC },
-       { NULL }
-};
-
-static const struct sched_map {
-       const char *name;
-       int policy;
-} schmap[] = {
-       { "OTHER", SCHED_OTHER },
-       { "FIFO", SCHED_FIFO },
-       { NULL }
-};
-
-static int num_socks;
-struct xsk_socket_info *xsks[MAX_SOCKS];
-int sock;
-
-static int get_clockid(clockid_t *id, const char *name)
-{
-       const struct clockid_map *clk;
-
-       for (clk = clockids_map; clk->name; clk++) {
-               if (strcasecmp(clk->name, name) == 0) {
-                       *id = clk->clockid;
-                       return 0;
-               }
-       }
-
-       return -1;
-}
-
-static int get_schpolicy(int *policy, const char *name)
-{
-       const struct sched_map *sch;
-
-       for (sch = schmap; sch->name; sch++) {
-               if (strcasecmp(sch->name, name) == 0) {
-                       *policy = sch->policy;
-                       return 0;
-               }
-       }
-
-       return -1;
-}
-
-static unsigned long get_nsecs(void)
-{
-       struct timespec ts;
-
-       clock_gettime(opt_clock, &ts);
-       return ts.tv_sec * 1000000000UL + ts.tv_nsec;
-}
-
-static void print_benchmark(bool running)
-{
-       const char *bench_str = "INVALID";
-
-       if (opt_bench == BENCH_RXDROP)
-               bench_str = "rxdrop";
-       else if (opt_bench == BENCH_TXONLY)
-               bench_str = "txonly";
-       else if (opt_bench == BENCH_L2FWD)
-               bench_str = "l2fwd";
-
-       printf("%s:%d %s ", opt_if, opt_queue, bench_str);
-       if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
-               printf("xdp-skb ");
-       else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
-               printf("xdp-drv ");
-       else
-               printf("        ");
-
-       if (opt_poll)
-               printf("poll() ");
-
-       if (running) {
-               printf("running...");
-               fflush(stdout);
-       }
-}
-
-static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk)
-{
-       struct xdp_statistics stats;
-       socklen_t optlen;
-       int err;
-
-       optlen = sizeof(stats);
-       err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
-       if (err)
-               return err;
-
-       if (optlen == sizeof(struct xdp_statistics)) {
-               xsk->ring_stats.rx_dropped_npkts = stats.rx_dropped;
-               xsk->ring_stats.rx_invalid_npkts = stats.rx_invalid_descs;
-               xsk->ring_stats.tx_invalid_npkts = stats.tx_invalid_descs;
-               xsk->ring_stats.rx_full_npkts = stats.rx_ring_full;
-               xsk->ring_stats.rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs;
-               xsk->ring_stats.tx_empty_npkts = stats.tx_ring_empty_descs;
-               return 0;
-       }
-
-       return -EINVAL;
-}
-
-static void dump_app_stats(long dt)
-{
-       int i;
-
-       for (i = 0; i < num_socks && xsks[i]; i++) {
-               char *fmt = "%-18s %'-14.0f %'-14lu\n";
-               double rx_empty_polls_ps, fill_fail_polls_ps, copy_tx_sendtos_ps,
-                               tx_wakeup_sendtos_ps, opt_polls_ps;
-
-               rx_empty_polls_ps = (xsks[i]->app_stats.rx_empty_polls -
-                                       xsks[i]->app_stats.prev_rx_empty_polls) * 1000000000. / dt;
-               fill_fail_polls_ps = (xsks[i]->app_stats.fill_fail_polls -
-                                       xsks[i]->app_stats.prev_fill_fail_polls) * 1000000000. / dt;
-               copy_tx_sendtos_ps = (xsks[i]->app_stats.copy_tx_sendtos -
-                                       xsks[i]->app_stats.prev_copy_tx_sendtos) * 1000000000. / dt;
-               tx_wakeup_sendtos_ps = (xsks[i]->app_stats.tx_wakeup_sendtos -
-                                       xsks[i]->app_stats.prev_tx_wakeup_sendtos)
-                                                                               * 1000000000. / dt;
-               opt_polls_ps = (xsks[i]->app_stats.opt_polls -
-                                       xsks[i]->app_stats.prev_opt_polls) * 1000000000. / dt;
-
-               printf("\n%-18s %-14s %-14s\n", "", "calls/s", "count");
-               printf(fmt, "rx empty polls", rx_empty_polls_ps, xsks[i]->app_stats.rx_empty_polls);
-               printf(fmt, "fill fail polls", fill_fail_polls_ps,
-                                                       xsks[i]->app_stats.fill_fail_polls);
-               printf(fmt, "copy tx sendtos", copy_tx_sendtos_ps,
-                                                       xsks[i]->app_stats.copy_tx_sendtos);
-               printf(fmt, "tx wakeup sendtos", tx_wakeup_sendtos_ps,
-                                                       xsks[i]->app_stats.tx_wakeup_sendtos);
-               printf(fmt, "opt polls", opt_polls_ps, xsks[i]->app_stats.opt_polls);
-
-               xsks[i]->app_stats.prev_rx_empty_polls = xsks[i]->app_stats.rx_empty_polls;
-               xsks[i]->app_stats.prev_fill_fail_polls = xsks[i]->app_stats.fill_fail_polls;
-               xsks[i]->app_stats.prev_copy_tx_sendtos = xsks[i]->app_stats.copy_tx_sendtos;
-               xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos;
-               xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls;
-       }
-
-       if (opt_tx_cycle_ns) {
-               printf("\n%-18s %-10s %-10s %-10s %-10s %-10s\n",
-                      "", "period", "min", "ave", "max", "cycle");
-               printf("%-18s %-10lu %-10lu %-10lu %-10lu %-10lu\n",
-                      "Cyclic TX", opt_tx_cycle_ns, tx_cycle_diff_min,
-                      (long)(tx_cycle_diff_ave / tx_cycle_cnt),
-                      tx_cycle_diff_max, tx_cycle_cnt);
-       }
-}
-
-static bool get_interrupt_number(void)
-{
-       FILE *f_int_proc;
-       char line[4096];
-       bool found = false;
-
-       f_int_proc = fopen("/proc/interrupts", "r");
-       if (f_int_proc == NULL) {
-               printf("Failed to open /proc/interrupts.\n");
-               return found;
-       }
-
-       while (!feof(f_int_proc) && !found) {
-               /* Make sure to read a full line at a time */
-               if (fgets(line, sizeof(line), f_int_proc) == NULL ||
-                               line[strlen(line) - 1] != '\n') {
-                       printf("Error reading from interrupts file\n");
-                       break;
-               }
-
-               /* Extract interrupt number from line */
-               if (strstr(line, opt_irq_str) != NULL) {
-                       irq_no = atoi(line);
-                       found = true;
-                       break;
-               }
-       }
-
-       fclose(f_int_proc);
-
-       return found;
-}
-
-static int get_irqs(void)
-{
-       char count_path[PATH_MAX];
-       int total_intrs = -1;
-       FILE *f_count_proc;
-       char line[4096];
-
-       snprintf(count_path, sizeof(count_path),
-               "/sys/kernel/irq/%i/per_cpu_count", irq_no);
-       f_count_proc = fopen(count_path, "r");
-       if (f_count_proc == NULL) {
-               printf("Failed to open %s\n", count_path);
-               return total_intrs;
-       }
-
-       if (fgets(line, sizeof(line), f_count_proc) == NULL ||
-                       line[strlen(line) - 1] != '\n') {
-               printf("Error reading from %s\n", count_path);
-       } else {
-               static const char com[2] = ",";
-               char *token;
-
-               total_intrs = 0;
-               token = strtok(line, com);
-               while (token != NULL) {
-                       /* sum up interrupts across all cores */
-                       total_intrs += atoi(token);
-                       token = strtok(NULL, com);
-               }
-       }
-
-       fclose(f_count_proc);
-
-       return total_intrs;
-}
-
-static void dump_driver_stats(long dt)
-{
-       int i;
-
-       for (i = 0; i < num_socks && xsks[i]; i++) {
-               char *fmt = "%-18s %'-14.0f %'-14lu\n";
-               double intrs_ps;
-               int n_ints = get_irqs();
-
-               if (n_ints < 0) {
-                       printf("error getting intr info for intr %i\n", irq_no);
-                       return;
-               }
-               xsks[i]->drv_stats.intrs = n_ints - irqs_at_init;
-
-               intrs_ps = (xsks[i]->drv_stats.intrs - xsks[i]->drv_stats.prev_intrs) *
-                        1000000000. / dt;
-
-               printf("\n%-18s %-14s %-14s\n", "", "intrs/s", "count");
-               printf(fmt, "irqs", intrs_ps, xsks[i]->drv_stats.intrs);
-
-               xsks[i]->drv_stats.prev_intrs = xsks[i]->drv_stats.intrs;
-       }
-}
-
-static void dump_stats(void)
-{
-       unsigned long now = get_nsecs();
-       long dt = now - prev_time;
-       int i;
-
-       prev_time = now;
-
-       for (i = 0; i < num_socks && xsks[i]; i++) {
-               char *fmt = "%-18s %'-14.0f %'-14lu\n";
-               double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps,
-                       tx_invalid_pps, tx_empty_pps;
-
-               rx_pps = (xsks[i]->ring_stats.rx_npkts - xsks[i]->ring_stats.prev_rx_npkts) *
-                        1000000000. / dt;
-               tx_pps = (xsks[i]->ring_stats.tx_npkts - xsks[i]->ring_stats.prev_tx_npkts) *
-                        1000000000. / dt;
-
-               printf("\n sock%d@", i);
-               print_benchmark(false);
-               printf("\n");
-
-               printf("%-18s %-14s %-14s %-14.2f\n", "", "pps", "pkts",
-                      dt / 1000000000.);
-               printf(fmt, "rx", rx_pps, xsks[i]->ring_stats.rx_npkts);
-               printf(fmt, "tx", tx_pps, xsks[i]->ring_stats.tx_npkts);
-
-               xsks[i]->ring_stats.prev_rx_npkts = xsks[i]->ring_stats.rx_npkts;
-               xsks[i]->ring_stats.prev_tx_npkts = xsks[i]->ring_stats.tx_npkts;
-
-               if (opt_extra_stats) {
-                       if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) {
-                               dropped_pps = (xsks[i]->ring_stats.rx_dropped_npkts -
-                                               xsks[i]->ring_stats.prev_rx_dropped_npkts) *
-                                                       1000000000. / dt;
-                               rx_invalid_pps = (xsks[i]->ring_stats.rx_invalid_npkts -
-                                               xsks[i]->ring_stats.prev_rx_invalid_npkts) *
-                                                       1000000000. / dt;
-                               tx_invalid_pps = (xsks[i]->ring_stats.tx_invalid_npkts -
-                                               xsks[i]->ring_stats.prev_tx_invalid_npkts) *
-                                                       1000000000. / dt;
-                               full_pps = (xsks[i]->ring_stats.rx_full_npkts -
-                                               xsks[i]->ring_stats.prev_rx_full_npkts) *
-                                                       1000000000. / dt;
-                               fill_empty_pps = (xsks[i]->ring_stats.rx_fill_empty_npkts -
-                                               xsks[i]->ring_stats.prev_rx_fill_empty_npkts) *
-                                                       1000000000. / dt;
-                               tx_empty_pps = (xsks[i]->ring_stats.tx_empty_npkts -
-                                               xsks[i]->ring_stats.prev_tx_empty_npkts) *
-                                                       1000000000. / dt;
-
-                               printf(fmt, "rx dropped", dropped_pps,
-                                      xsks[i]->ring_stats.rx_dropped_npkts);
-                               printf(fmt, "rx invalid", rx_invalid_pps,
-                                      xsks[i]->ring_stats.rx_invalid_npkts);
-                               printf(fmt, "tx invalid", tx_invalid_pps,
-                                      xsks[i]->ring_stats.tx_invalid_npkts);
-                               printf(fmt, "rx queue full", full_pps,
-                                      xsks[i]->ring_stats.rx_full_npkts);
-                               printf(fmt, "fill ring empty", fill_empty_pps,
-                                      xsks[i]->ring_stats.rx_fill_empty_npkts);
-                               printf(fmt, "tx ring empty", tx_empty_pps,
-                                      xsks[i]->ring_stats.tx_empty_npkts);
-
-                               xsks[i]->ring_stats.prev_rx_dropped_npkts =
-                                       xsks[i]->ring_stats.rx_dropped_npkts;
-                               xsks[i]->ring_stats.prev_rx_invalid_npkts =
-                                       xsks[i]->ring_stats.rx_invalid_npkts;
-                               xsks[i]->ring_stats.prev_tx_invalid_npkts =
-                                       xsks[i]->ring_stats.tx_invalid_npkts;
-                               xsks[i]->ring_stats.prev_rx_full_npkts =
-                                       xsks[i]->ring_stats.rx_full_npkts;
-                               xsks[i]->ring_stats.prev_rx_fill_empty_npkts =
-                                       xsks[i]->ring_stats.rx_fill_empty_npkts;
-                               xsks[i]->ring_stats.prev_tx_empty_npkts =
-                                       xsks[i]->ring_stats.tx_empty_npkts;
-                       } else {
-                               printf("%-15s\n", "Error retrieving extra stats");
-                       }
-               }
-       }
-
-       if (opt_app_stats)
-               dump_app_stats(dt);
-       if (irq_no)
-               dump_driver_stats(dt);
-}
-
-static bool is_benchmark_done(void)
-{
-       if (opt_duration > 0) {
-               unsigned long dt = (get_nsecs() - start_time);
-
-               if (dt >= opt_duration)
-                       benchmark_done = true;
-       }
-       return benchmark_done;
-}
-
-static void *poller(void *arg)
-{
-       (void)arg;
-       while (!is_benchmark_done()) {
-               sleep(opt_interval);
-               dump_stats();
-       }
-
-       return NULL;
-}
-
-static void remove_xdp_program(void)
-{
-       u32 curr_prog_id = 0;
-
-       if (bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &curr_prog_id)) {
-               printf("bpf_xdp_query_id failed\n");
-               exit(EXIT_FAILURE);
-       }
-
-       if (prog_id == curr_prog_id)
-               bpf_xdp_detach(opt_ifindex, opt_xdp_flags, NULL);
-       else if (!curr_prog_id)
-               printf("couldn't find a prog id on a given interface\n");
-       else
-               printf("program on interface changed, not removing\n");
-}
-
-static void int_exit(int sig)
-{
-       benchmark_done = true;
-}
-
-static void __exit_with_error(int error, const char *file, const char *func,
-                             int line)
-{
-       fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
-               line, error, strerror(error));
-
-       if (opt_num_xsks > 1)
-               remove_xdp_program();
-       exit(EXIT_FAILURE);
-}
-
-#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
-
-static void xdpsock_cleanup(void)
-{
-       struct xsk_umem *umem = xsks[0]->umem->umem;
-       int i, cmd = CLOSE_CONN;
-
-       dump_stats();
-       for (i = 0; i < num_socks; i++)
-               xsk_socket__delete(xsks[i]->xsk);
-       (void)xsk_umem__delete(umem);
-
-       if (opt_reduced_cap) {
-               if (write(sock, &cmd, sizeof(int)) < 0)
-                       exit_with_error(errno);
-       }
-
-       if (opt_num_xsks > 1)
-               remove_xdp_program();
-}
-
-static void swap_mac_addresses(void *data)
-{
-       struct ether_header *eth = (struct ether_header *)data;
-       struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
-       struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
-       struct ether_addr tmp;
-
-       tmp = *src_addr;
-       *src_addr = *dst_addr;
-       *dst_addr = tmp;
-}
-
-static void hex_dump(void *pkt, size_t length, u64 addr)
-{
-       const unsigned char *address = (unsigned char *)pkt;
-       const unsigned char *line = address;
-       size_t line_size = 32;
-       unsigned char c;
-       char buf[32];
-       int i = 0;
-
-       if (!DEBUG_HEXDUMP)
-               return;
-
-       sprintf(buf, "addr=%llu", addr);
-       printf("length = %zu\n", length);
-       printf("%s | ", buf);
-       while (length-- > 0) {
-               printf("%02X ", *address++);
-               if (!(++i % line_size) || (length == 0 && i % line_size)) {
-                       if (length == 0) {
-                               while (i++ % line_size)
-                                       printf("__ ");
-                       }
-                       printf(" | ");  /* right close */
-                       while (line < address) {
-                               c = *line++;
-                               printf("%c", (c < 33 || c == 255) ? 0x2E : c);
-                       }
-                       printf("\n");
-                       if (length > 0)
-                               printf("%s | ", buf);
-               }
-       }
-       printf("\n");
-}
-
-static void *memset32_htonl(void *dest, u32 val, u32 size)
-{
-       u32 *ptr = (u32 *)dest;
-       int i;
-
-       val = htonl(val);
-
-       for (i = 0; i < (size & (~0x3)); i += 4)
-               ptr[i >> 2] = val;
-
-       for (; i < size; i++)
-               ((char *)dest)[i] = ((char *)&val)[i & 3];
-
-       return dest;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static inline unsigned short from32to16(unsigned int x)
-{
-       /* add up 16-bit and 16-bit for 16+c bit */
-       x = (x & 0xffff) + (x >> 16);
-       /* add up carry.. */
-       x = (x & 0xffff) + (x >> 16);
-       return x;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static unsigned int do_csum(const unsigned char *buff, int len)
-{
-       unsigned int result = 0;
-       int odd;
-
-       if (len <= 0)
-               goto out;
-       odd = 1 & (unsigned long)buff;
-       if (odd) {
-#ifdef __LITTLE_ENDIAN
-               result += (*buff << 8);
-#else
-               result = *buff;
-#endif
-               len--;
-               buff++;
-       }
-       if (len >= 2) {
-               if (2 & (unsigned long)buff) {
-                       result += *(unsigned short *)buff;
-                       len -= 2;
-                       buff += 2;
-               }
-               if (len >= 4) {
-                       const unsigned char *end = buff +
-                                                  ((unsigned int)len & ~3);
-                       unsigned int carry = 0;
-
-                       do {
-                               unsigned int w = *(unsigned int *)buff;
-
-                               buff += 4;
-                               result += carry;
-                               result += w;
-                               carry = (w > result);
-                       } while (buff < end);
-                       result += carry;
-                       result = (result & 0xffff) + (result >> 16);
-               }
-               if (len & 2) {
-                       result += *(unsigned short *)buff;
-                       buff += 2;
-               }
-       }
-       if (len & 1)
-#ifdef __LITTLE_ENDIAN
-               result += *buff;
-#else
-               result += (*buff << 8);
-#endif
-       result = from32to16(result);
-       if (odd)
-               result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
-out:
-       return result;
-}
-
-/*
- *     This is a version of ip_compute_csum() optimized for IP headers,
- *     which always checksum on 4 octet boundaries.
- *     This function code has been taken from
- *     Linux kernel lib/checksum.c
- */
-static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
-       return (__sum16)~do_csum(iph, ihl * 4);
-}
-
-/*
- * Fold a partial checksum
- * This function code has been taken from
- * Linux kernel include/asm-generic/checksum.h
- */
-static inline __sum16 csum_fold(__wsum csum)
-{
-       u32 sum = (u32)csum;
-
-       sum = (sum & 0xffff) + (sum >> 16);
-       sum = (sum & 0xffff) + (sum >> 16);
-       return (__sum16)~sum;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static inline u32 from64to32(u64 x)
-{
-       /* add up 32-bit and 32-bit for 32+c bit */
-       x = (x & 0xffffffff) + (x >> 32);
-       /* add up carry.. */
-       x = (x & 0xffffffff) + (x >> 32);
-       return (u32)x;
-}
-
-__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
-                         __u32 len, __u8 proto, __wsum sum);
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
-                         __u32 len, __u8 proto, __wsum sum)
-{
-       unsigned long long s = (u32)sum;
-
-       s += (u32)saddr;
-       s += (u32)daddr;
-#ifdef __BIG_ENDIAN__
-       s += proto + len;
-#else
-       s += (proto + len) << 8;
-#endif
-       return (__wsum)from64to32(s);
-}
-
-/*
- * This function has been taken from
- * Linux kernel include/asm-generic/checksum.h
- */
-static inline __sum16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
-                 __u8 proto, __wsum sum)
-{
-       return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
-}
-
-static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len,
-                          u8 proto, u16 *udp_pkt)
-{
-       u32 csum = 0;
-       u32 cnt = 0;
-
-       /* udp hdr and data */
-       for (; cnt < len; cnt += 2)
-               csum += udp_pkt[cnt >> 1];
-
-       return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
-}
-
-#define ETH_FCS_SIZE 4
-
-#define ETH_HDR_SIZE (opt_vlan_tag ? sizeof(struct vlan_ethhdr) : \
-                     sizeof(struct ethhdr))
-#define PKTGEN_HDR_SIZE (opt_tstamp ? sizeof(struct pktgen_hdr) : 0)
-#define PKT_HDR_SIZE (ETH_HDR_SIZE + sizeof(struct iphdr) + \
-                     sizeof(struct udphdr) + PKTGEN_HDR_SIZE)
-#define PKTGEN_HDR_OFFSET (ETH_HDR_SIZE + sizeof(struct iphdr) + \
-                          sizeof(struct udphdr))
-#define PKTGEN_SIZE_MIN (PKTGEN_HDR_OFFSET + sizeof(struct pktgen_hdr) + \
-                        ETH_FCS_SIZE)
-
-#define PKT_SIZE               (opt_pkt_size - ETH_FCS_SIZE)
-#define IP_PKT_SIZE            (PKT_SIZE - ETH_HDR_SIZE)
-#define UDP_PKT_SIZE           (IP_PKT_SIZE - sizeof(struct iphdr))
-#define UDP_PKT_DATA_SIZE      (UDP_PKT_SIZE - \
-                                (sizeof(struct udphdr) + PKTGEN_HDR_SIZE))
-
-static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
-
-static void gen_eth_hdr_data(void)
-{
-       struct pktgen_hdr *pktgen_hdr;
-       struct udphdr *udp_hdr;
-       struct iphdr *ip_hdr;
-
-       if (opt_vlan_tag) {
-               struct vlan_ethhdr *veth_hdr = (struct vlan_ethhdr *)pkt_data;
-               u16 vlan_tci = 0;
-
-               udp_hdr = (struct udphdr *)(pkt_data +
-                                           sizeof(struct vlan_ethhdr) +
-                                           sizeof(struct iphdr));
-               ip_hdr = (struct iphdr *)(pkt_data +
-                                         sizeof(struct vlan_ethhdr));
-               pktgen_hdr = (struct pktgen_hdr *)(pkt_data +
-                                                  sizeof(struct vlan_ethhdr) +
-                                                  sizeof(struct iphdr) +
-                                                  sizeof(struct udphdr));
-               /* ethernet & VLAN header */
-               memcpy(veth_hdr->h_dest, &opt_txdmac, ETH_ALEN);
-               memcpy(veth_hdr->h_source, &opt_txsmac, ETH_ALEN);
-               veth_hdr->h_vlan_proto = htons(ETH_P_8021Q);
-               vlan_tci = opt_pkt_vlan_id & VLAN_VID_MASK;
-               vlan_tci |= (opt_pkt_vlan_pri << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
-               veth_hdr->h_vlan_TCI = htons(vlan_tci);
-               veth_hdr->h_vlan_encapsulated_proto = htons(ETH_P_IP);
-       } else {
-               struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
-
-               udp_hdr = (struct udphdr *)(pkt_data +
-                                           sizeof(struct ethhdr) +
-                                           sizeof(struct iphdr));
-               ip_hdr = (struct iphdr *)(pkt_data +
-                                         sizeof(struct ethhdr));
-               pktgen_hdr = (struct pktgen_hdr *)(pkt_data +
-                                                  sizeof(struct ethhdr) +
-                                                  sizeof(struct iphdr) +
-                                                  sizeof(struct udphdr));
-               /* ethernet header */
-               memcpy(eth_hdr->h_dest, &opt_txdmac, ETH_ALEN);
-               memcpy(eth_hdr->h_source, &opt_txsmac, ETH_ALEN);
-               eth_hdr->h_proto = htons(ETH_P_IP);
-       }
-
-
-       /* IP header */
-       ip_hdr->version = IPVERSION;
-       ip_hdr->ihl = 0x5; /* 20 byte header */
-       ip_hdr->tos = 0x0;
-       ip_hdr->tot_len = htons(IP_PKT_SIZE);
-       ip_hdr->id = 0;
-       ip_hdr->frag_off = 0;
-       ip_hdr->ttl = IPDEFTTL;
-       ip_hdr->protocol = IPPROTO_UDP;
-       ip_hdr->saddr = htonl(0x0a0a0a10);
-       ip_hdr->daddr = htonl(0x0a0a0a20);
-
-       /* IP header checksum */
-       ip_hdr->check = 0;
-       ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl);
-
-       /* UDP header */
-       udp_hdr->source = htons(0x1000);
-       udp_hdr->dest = htons(0x1000);
-       udp_hdr->len = htons(UDP_PKT_SIZE);
-
-       if (opt_tstamp)
-               pktgen_hdr->pgh_magic = htonl(PKTGEN_MAGIC);
-
-       /* UDP data */
-       memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern,
-                      UDP_PKT_DATA_SIZE);
-
-       /* UDP header checksum */
-       udp_hdr->check = 0;
-       udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE,
-                                 IPPROTO_UDP, (u16 *)udp_hdr);
-}
-
-static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
-{
-       memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data,
-              PKT_SIZE);
-}
-
-static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
-{
-       struct xsk_umem_info *umem;
-       struct xsk_umem_config cfg = {
-               /* We recommend that you set the fill ring size >= HW RX ring size +
-                * AF_XDP RX ring size. Make sure you fill up the fill ring
-                * with buffers at regular intervals, and you will with this setting
-                * avoid allocation failures in the driver. These are usually quite
-                * expensive since drivers have not been written to assume that
-                * allocation failures are common. For regular sockets, kernel
-                * allocated memory is used that only runs out in OOM situations
-                * that should be rare.
-                */
-               .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
-               .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
-               .frame_size = opt_xsk_frame_size,
-               .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
-               .flags = opt_umem_flags
-       };
-       int ret;
-
-       umem = calloc(1, sizeof(*umem));
-       if (!umem)
-               exit_with_error(errno);
-
-       ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
-                              &cfg);
-       if (ret)
-               exit_with_error(-ret);
-
-       umem->buffer = buffer;
-       return umem;
-}
-
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
-{
-       int ret, i;
-       u32 idx;
-
-       ret = xsk_ring_prod__reserve(&umem->fq,
-                                    XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, &idx);
-       if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS * 2)
-               exit_with_error(-ret);
-       for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; i++)
-               *xsk_ring_prod__fill_addr(&umem->fq, idx++) =
-                       i * opt_xsk_frame_size;
-       xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2);
-}
-
-static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
-                                                   bool rx, bool tx)
-{
-       struct xsk_socket_config cfg;
-       struct xsk_socket_info *xsk;
-       struct xsk_ring_cons *rxr;
-       struct xsk_ring_prod *txr;
-       int ret;
-
-       xsk = calloc(1, sizeof(*xsk));
-       if (!xsk)
-               exit_with_error(errno);
-
-       xsk->umem = umem;
-       cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-       cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-       if (opt_num_xsks > 1 || opt_reduced_cap)
-               cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
-       else
-               cfg.libbpf_flags = 0;
-       cfg.xdp_flags = opt_xdp_flags;
-       cfg.bind_flags = opt_xdp_bind_flags;
-
-       rxr = rx ? &xsk->rx : NULL;
-       txr = tx ? &xsk->tx : NULL;
-       ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem,
-                                rxr, txr, &cfg);
-       if (ret)
-               exit_with_error(-ret);
-
-       ret = bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &prog_id);
-       if (ret)
-               exit_with_error(-ret);
-
-       xsk->app_stats.rx_empty_polls = 0;
-       xsk->app_stats.fill_fail_polls = 0;
-       xsk->app_stats.copy_tx_sendtos = 0;
-       xsk->app_stats.tx_wakeup_sendtos = 0;
-       xsk->app_stats.opt_polls = 0;
-       xsk->app_stats.prev_rx_empty_polls = 0;
-       xsk->app_stats.prev_fill_fail_polls = 0;
-       xsk->app_stats.prev_copy_tx_sendtos = 0;
-       xsk->app_stats.prev_tx_wakeup_sendtos = 0;
-       xsk->app_stats.prev_opt_polls = 0;
-
-       return xsk;
-}
-
-static struct option long_options[] = {
-       {"rxdrop", no_argument, 0, 'r'},
-       {"txonly", no_argument, 0, 't'},
-       {"l2fwd", no_argument, 0, 'l'},
-       {"interface", required_argument, 0, 'i'},
-       {"queue", required_argument, 0, 'q'},
-       {"poll", no_argument, 0, 'p'},
-       {"xdp-skb", no_argument, 0, 'S'},
-       {"xdp-native", no_argument, 0, 'N'},
-       {"interval", required_argument, 0, 'n'},
-       {"retries", required_argument, 0, 'O'},
-       {"zero-copy", no_argument, 0, 'z'},
-       {"copy", no_argument, 0, 'c'},
-       {"frame-size", required_argument, 0, 'f'},
-       {"no-need-wakeup", no_argument, 0, 'm'},
-       {"unaligned", no_argument, 0, 'u'},
-       {"shared-umem", no_argument, 0, 'M'},
-       {"force", no_argument, 0, 'F'},
-       {"duration", required_argument, 0, 'd'},
-       {"clock", required_argument, 0, 'w'},
-       {"batch-size", required_argument, 0, 'b'},
-       {"tx-pkt-count", required_argument, 0, 'C'},
-       {"tx-pkt-size", required_argument, 0, 's'},
-       {"tx-pkt-pattern", required_argument, 0, 'P'},
-       {"tx-vlan", no_argument, 0, 'V'},
-       {"tx-vlan-id", required_argument, 0, 'J'},
-       {"tx-vlan-pri", required_argument, 0, 'K'},
-       {"tx-dmac", required_argument, 0, 'G'},
-       {"tx-smac", required_argument, 0, 'H'},
-       {"tx-cycle", required_argument, 0, 'T'},
-       {"tstamp", no_argument, 0, 'y'},
-       {"policy", required_argument, 0, 'W'},
-       {"schpri", required_argument, 0, 'U'},
-       {"extra-stats", no_argument, 0, 'x'},
-       {"quiet", no_argument, 0, 'Q'},
-       {"app-stats", no_argument, 0, 'a'},
-       {"irq-string", no_argument, 0, 'I'},
-       {"busy-poll", no_argument, 0, 'B'},
-       {"reduce-cap", no_argument, 0, 'R'},
-       {0, 0, 0, 0}
-};
-
-static void usage(const char *prog)
-{
-       const char *str =
-               "  Usage: %s [OPTIONS]\n"
-               "  Options:\n"
-               "  -r, --rxdrop         Discard all incoming packets (default)\n"
-               "  -t, --txonly         Only send packets\n"
-               "  -l, --l2fwd          MAC swap L2 forwarding\n"
-               "  -i, --interface=n    Run on interface n\n"
-               "  -q, --queue=n        Use queue n (default 0)\n"
-               "  -p, --poll           Use poll syscall\n"
-               "  -S, --xdp-skb=n      Use XDP skb-mod\n"
-               "  -N, --xdp-native=n   Enforce XDP native mode\n"
-               "  -n, --interval=n     Specify statistics update interval (default 1 sec).\n"
-               "  -O, --retries=n      Specify time-out retries (1s interval) attempt (default 3).\n"
-               "  -z, --zero-copy      Force zero-copy mode.\n"
-               "  -c, --copy           Force copy mode.\n"
-               "  -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
-               "  -f, --frame-size=n   Set the frame size (must be a power of two in aligned mode, default is %d).\n"
-               "  -u, --unaligned      Enable unaligned chunk placement\n"
-               "  -M, --shared-umem    Enable XDP_SHARED_UMEM (cannot be used with -R)\n"
-               "  -F, --force          Force loading the XDP prog\n"
-               "  -d, --duration=n     Duration in secs to run command.\n"
-               "                       Default: forever.\n"
-               "  -w, --clock=CLOCK    Clock NAME (default MONOTONIC).\n"
-               "  -b, --batch-size=n   Batch size for sending or receiving\n"
-               "                       packets. Default: %d\n"
-               "  -C, --tx-pkt-count=n Number of packets to send.\n"
-               "                       Default: Continuous packets.\n"
-               "  -s, --tx-pkt-size=n  Transmit packet size.\n"
-               "                       (Default: %d bytes)\n"
-               "                       Min size: %d, Max size %d.\n"
-               "  -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
-               "  -V, --tx-vlan        Send VLAN tagged  packets (For -t|--txonly)\n"
-               "  -J, --tx-vlan-id=n   Tx VLAN ID [1-4095]. Default: %d (For -V|--tx-vlan)\n"
-               "  -K, --tx-vlan-pri=n  Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n"
-               "  -G, --tx-dmac=<MAC>  Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
-               "  -H, --tx-smac=<MAC>  Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
-               "  -T, --tx-cycle=n     Tx cycle time in micro-seconds (For -t|--txonly).\n"
-               "  -y, --tstamp         Add time-stamp to packet (For -t|--txonly).\n"
-               "  -W, --policy=POLICY  Schedule policy. Default: SCHED_OTHER\n"
-               "  -U, --schpri=n       Schedule priority. Default: %d\n"
-               "  -x, --extra-stats    Display extra statistics.\n"
-               "  -Q, --quiet          Do not display any stats.\n"
-               "  -a, --app-stats      Display application (syscall) statistics.\n"
-               "  -I, --irq-string     Display driver interrupt statistics for interface associated with irq-string.\n"
-               "  -B, --busy-poll      Busy poll.\n"
-               "  -R, --reduce-cap     Use reduced capabilities (cannot be used with -M)\n"
-               "\n";
-       fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
-               opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
-               XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern,
-               VLAN_VID__DEFAULT, VLAN_PRI__DEFAULT,
-               SCHED_PRI__DEFAULT);
-
-       exit(EXIT_FAILURE);
-}
-
-static void parse_command_line(int argc, char **argv)
-{
-       int option_index, c;
-
-       opterr = 0;
-
-       for (;;) {
-               c = getopt_long(argc, argv,
-                               "Frtli:q:pSNn:w:O:czf:muMd:b:C:s:P:VJ:K:G:H:T:yW:U:xQaI:BR",
-                               long_options, &option_index);
-               if (c == -1)
-                       break;
-
-               switch (c) {
-               case 'r':
-                       opt_bench = BENCH_RXDROP;
-                       break;
-               case 't':
-                       opt_bench = BENCH_TXONLY;
-                       break;
-               case 'l':
-                       opt_bench = BENCH_L2FWD;
-                       break;
-               case 'i':
-                       opt_if = optarg;
-                       break;
-               case 'q':
-                       opt_queue = atoi(optarg);
-                       break;
-               case 'p':
-                       opt_poll = 1;
-                       break;
-               case 'S':
-                       opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
-                       opt_xdp_bind_flags |= XDP_COPY;
-                       break;
-               case 'N':
-                       /* default, set below */
-                       break;
-               case 'n':
-                       opt_interval = atoi(optarg);
-                       break;
-               case 'w':
-                       if (get_clockid(&opt_clock, optarg)) {
-                               fprintf(stderr,
-                                       "ERROR: Invalid clock %s. Default to CLOCK_MONOTONIC.\n",
-                                       optarg);
-                               opt_clock = CLOCK_MONOTONIC;
-                       }
-                       break;
-               case 'O':
-                       opt_retries = atoi(optarg);
-                       break;
-               case 'z':
-                       opt_xdp_bind_flags |= XDP_ZEROCOPY;
-                       break;
-               case 'c':
-                       opt_xdp_bind_flags |= XDP_COPY;
-                       break;
-               case 'u':
-                       opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
-                       opt_unaligned_chunks = 1;
-                       opt_mmap_flags = MAP_HUGETLB;
-                       break;
-               case 'F':
-                       opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
-                       break;
-               case 'f':
-                       opt_xsk_frame_size = atoi(optarg);
-                       break;
-               case 'm':
-                       opt_need_wakeup = false;
-                       opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
-                       break;
-               case 'M':
-                       opt_num_xsks = MAX_SOCKS;
-                       break;
-               case 'd':
-                       opt_duration = atoi(optarg);
-                       opt_duration *= 1000000000;
-                       break;
-               case 'b':
-                       opt_batch_size = atoi(optarg);
-                       break;
-               case 'C':
-                       opt_pkt_count = atoi(optarg);
-                       break;
-               case 's':
-                       opt_pkt_size = atoi(optarg);
-                       if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) ||
-                           opt_pkt_size < MIN_PKT_SIZE) {
-                               fprintf(stderr,
-                                       "ERROR: Invalid frame size %d\n",
-                                       opt_pkt_size);
-                               usage(basename(argv[0]));
-                       }
-                       break;
-               case 'P':
-                       opt_pkt_fill_pattern = strtol(optarg, NULL, 16);
-                       break;
-               case 'V':
-                       opt_vlan_tag = true;
-                       break;
-               case 'J':
-                       opt_pkt_vlan_id = atoi(optarg);
-                       break;
-               case 'K':
-                       opt_pkt_vlan_pri = atoi(optarg);
-                       break;
-               case 'G':
-                       if (!ether_aton_r(optarg,
-                                         (struct ether_addr *)&opt_txdmac)) {
-                               fprintf(stderr, "Invalid dmac address:%s\n",
-                                       optarg);
-                               usage(basename(argv[0]));
-                       }
-                       break;
-               case 'H':
-                       if (!ether_aton_r(optarg,
-                                         (struct ether_addr *)&opt_txsmac)) {
-                               fprintf(stderr, "Invalid smac address:%s\n",
-                                       optarg);
-                               usage(basename(argv[0]));
-                       }
-                       break;
-               case 'T':
-                       opt_tx_cycle_ns = atoi(optarg);
-                       opt_tx_cycle_ns *= NSEC_PER_USEC;
-                       break;
-               case 'y':
-                       opt_tstamp = 1;
-                       break;
-               case 'W':
-                       if (get_schpolicy(&opt_schpolicy, optarg)) {
-                               fprintf(stderr,
-                                       "ERROR: Invalid policy %s. Default to SCHED_OTHER.\n",
-                                       optarg);
-                               opt_schpolicy = SCHED_OTHER;
-                       }
-                       break;
-               case 'U':
-                       opt_schprio = atoi(optarg);
-                       break;
-               case 'x':
-                       opt_extra_stats = 1;
-                       break;
-               case 'Q':
-                       opt_quiet = 1;
-                       break;
-               case 'a':
-                       opt_app_stats = 1;
-                       break;
-               case 'I':
-                       opt_irq_str = optarg;
-                       if (get_interrupt_number())
-                               irqs_at_init = get_irqs();
-                       if (irqs_at_init < 0) {
-                               fprintf(stderr, "ERROR: Failed to get irqs for %s\n", opt_irq_str);
-                               usage(basename(argv[0]));
-                       }
-                       break;
-               case 'B':
-                       opt_busy_poll = 1;
-                       break;
-               case 'R':
-                       opt_reduced_cap = true;
-                       break;
-               default:
-                       usage(basename(argv[0]));
-               }
-       }
-
-       if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE))
-               opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
-
-       opt_ifindex = if_nametoindex(opt_if);
-       if (!opt_ifindex) {
-               fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
-                       opt_if);
-               usage(basename(argv[0]));
-       }
-
-       if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) &&
-           !opt_unaligned_chunks) {
-               fprintf(stderr, "--frame-size=%d is not a power of two\n",
-                       opt_xsk_frame_size);
-               usage(basename(argv[0]));
-       }
-
-       if (opt_reduced_cap && opt_num_xsks > 1) {
-               fprintf(stderr, "ERROR: -M and -R cannot be used together\n");
-               usage(basename(argv[0]));
-       }
-}
-
-static void kick_tx(struct xsk_socket_info *xsk)
-{
-       int ret;
-
-       ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
-       if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN ||
-           errno == EBUSY || errno == ENETDOWN)
-               return;
-       exit_with_error(errno);
-}
-
-static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
-{
-       struct xsk_umem_info *umem = xsk->umem;
-       u32 idx_cq = 0, idx_fq = 0;
-       unsigned int rcvd;
-       size_t ndescs;
-
-       if (!xsk->outstanding_tx)
-               return;
-
-       /* In copy mode, Tx is driven by a syscall so we need to use e.g. sendto() to
-        * really send the packets. In zero-copy mode we do not have to do this, since Tx
-        * is driven by the NAPI loop. So as an optimization, we do not have to call
-        * sendto() all the time in zero-copy mode for l2fwd.
-        */
-       if (opt_xdp_bind_flags & XDP_COPY) {
-               xsk->app_stats.copy_tx_sendtos++;
-               kick_tx(xsk);
-       }
-
-       ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
-               xsk->outstanding_tx;
-
-       /* re-add completed Tx buffers */
-       rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq);
-       if (rcvd > 0) {
-               unsigned int i;
-               int ret;
-
-               ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-               while (ret != rcvd) {
-                       if (ret < 0)
-                               exit_with_error(-ret);
-                       if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&umem->fq)) {
-                               xsk->app_stats.fill_fail_polls++;
-                               recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL,
-                                        NULL);
-                       }
-                       ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-               }
-
-               for (i = 0; i < rcvd; i++)
-                       *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) =
-                               *xsk_ring_cons__comp_addr(&umem->cq, idx_cq++);
-
-               xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
-               xsk_ring_cons__release(&xsk->umem->cq, rcvd);
-               xsk->outstanding_tx -= rcvd;
-       }
-}
-
-static inline void complete_tx_only(struct xsk_socket_info *xsk,
-                                   int batch_size)
-{
-       unsigned int rcvd;
-       u32 idx;
-
-       if (!xsk->outstanding_tx)
-               return;
-
-       if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) {
-               xsk->app_stats.tx_wakeup_sendtos++;
-               kick_tx(xsk);
-       }
-
-       rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
-       if (rcvd > 0) {
-               xsk_ring_cons__release(&xsk->umem->cq, rcvd);
-               xsk->outstanding_tx -= rcvd;
-       }
-}
-
-static void rx_drop(struct xsk_socket_info *xsk)
-{
-       unsigned int rcvd, i;
-       u32 idx_rx = 0, idx_fq = 0;
-       int ret;
-
-       rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
-       if (!rcvd) {
-               if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
-                       xsk->app_stats.rx_empty_polls++;
-                       recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
-               }
-               return;
-       }
-
-       ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
-       while (ret != rcvd) {
-               if (ret < 0)
-                       exit_with_error(-ret);
-               if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
-                       xsk->app_stats.fill_fail_polls++;
-                       recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
-               }
-               ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
-       }
-
-       for (i = 0; i < rcvd; i++) {
-               u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
-               u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
-               u64 orig = xsk_umem__extract_addr(addr);
-
-               addr = xsk_umem__add_offset_to_addr(addr);
-               char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
-
-               hex_dump(pkt, len, addr);
-               *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
-       }
-
-       xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
-       xsk_ring_cons__release(&xsk->rx, rcvd);
-       xsk->ring_stats.rx_npkts += rcvd;
-}
-
-static void rx_drop_all(void)
-{
-       struct pollfd fds[MAX_SOCKS] = {};
-       int i, ret;
-
-       for (i = 0; i < num_socks; i++) {
-               fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
-               fds[i].events = POLLIN;
-       }
-
-       for (;;) {
-               if (opt_poll) {
-                       for (i = 0; i < num_socks; i++)
-                               xsks[i]->app_stats.opt_polls++;
-                       ret = poll(fds, num_socks, opt_timeout);
-                       if (ret <= 0)
-                               continue;
-               }
-
-               for (i = 0; i < num_socks; i++)
-                       rx_drop(xsks[i]);
-
-               if (benchmark_done)
-                       break;
-       }
-}
-
-static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb,
-                  int batch_size, unsigned long tx_ns)
-{
-       u32 idx, tv_sec, tv_usec;
-       unsigned int i;
-
-       while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
-                                     batch_size) {
-               complete_tx_only(xsk, batch_size);
-               if (benchmark_done)
-                       return 0;
-       }
-
-       if (opt_tstamp) {
-               tv_sec = (u32)(tx_ns / NSEC_PER_SEC);
-               tv_usec = (u32)((tx_ns % NSEC_PER_SEC) / 1000);
-       }
-
-       for (i = 0; i < batch_size; i++) {
-               struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx,
-                                                                 idx + i);
-               tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size;
-               tx_desc->len = PKT_SIZE;
-
-               if (opt_tstamp) {
-                       struct pktgen_hdr *pktgen_hdr;
-                       u64 addr = tx_desc->addr;
-                       char *pkt;
-
-                       pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
-                       pktgen_hdr = (struct pktgen_hdr *)(pkt + PKTGEN_HDR_OFFSET);
-
-                       pktgen_hdr->seq_num = htonl(sequence++);
-                       pktgen_hdr->tv_sec = htonl(tv_sec);
-                       pktgen_hdr->tv_usec = htonl(tv_usec);
-
-                       hex_dump(pkt, PKT_SIZE, addr);
-               }
-       }
-
-       xsk_ring_prod__submit(&xsk->tx, batch_size);
-       xsk->ring_stats.tx_npkts += batch_size;
-       xsk->outstanding_tx += batch_size;
-       *frame_nb += batch_size;
-       *frame_nb %= NUM_FRAMES;
-       complete_tx_only(xsk, batch_size);
-
-       return batch_size;
-}
-
-static inline int get_batch_size(int pkt_cnt)
-{
-       if (!opt_pkt_count)
-               return opt_batch_size;
-
-       if (pkt_cnt + opt_batch_size <= opt_pkt_count)
-               return opt_batch_size;
-
-       return opt_pkt_count - pkt_cnt;
-}
-
-static void complete_tx_only_all(void)
-{
-       bool pending;
-       int i;
-
-       do {
-               pending = false;
-               for (i = 0; i < num_socks; i++) {
-                       if (xsks[i]->outstanding_tx) {
-                               complete_tx_only(xsks[i], opt_batch_size);
-                               pending = !!xsks[i]->outstanding_tx;
-                       }
-               }
-               sleep(1);
-       } while (pending && opt_retries-- > 0);
-}
-
-static void tx_only_all(void)
-{
-       struct pollfd fds[MAX_SOCKS] = {};
-       u32 frame_nb[MAX_SOCKS] = {};
-       unsigned long next_tx_ns = 0;
-       int pkt_cnt = 0;
-       int i, ret;
-
-       if (opt_poll && opt_tx_cycle_ns) {
-               fprintf(stderr,
-                       "Error: --poll and --tx-cycles are both set\n");
-               return;
-       }
-
-       for (i = 0; i < num_socks; i++) {
-               fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
-               fds[0].events = POLLOUT;
-       }
-
-       if (opt_tx_cycle_ns) {
-               /* Align Tx time to micro-second boundary */
-               next_tx_ns = (get_nsecs() / NSEC_PER_USEC + 1) *
-                            NSEC_PER_USEC;
-               next_tx_ns += opt_tx_cycle_ns;
-
-               /* Initialize periodic Tx scheduling variance */
-               tx_cycle_diff_min = 1000000000;
-               tx_cycle_diff_max = 0;
-               tx_cycle_diff_ave = 0.0;
-       }
-
-       while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
-               int batch_size = get_batch_size(pkt_cnt);
-               unsigned long tx_ns = 0;
-               struct timespec next;
-               int tx_cnt = 0;
-               long diff;
-               int err;
-
-               if (opt_poll) {
-                       for (i = 0; i < num_socks; i++)
-                               xsks[i]->app_stats.opt_polls++;
-                       ret = poll(fds, num_socks, opt_timeout);
-                       if (ret <= 0)
-                               continue;
-
-                       if (!(fds[0].revents & POLLOUT))
-                               continue;
-               }
-
-               if (opt_tx_cycle_ns) {
-                       next.tv_sec = next_tx_ns / NSEC_PER_SEC;
-                       next.tv_nsec = next_tx_ns % NSEC_PER_SEC;
-                       err = clock_nanosleep(opt_clock, TIMER_ABSTIME, &next, NULL);
-                       if (err) {
-                               if (err != EINTR)
-                                       fprintf(stderr,
-                                               "clock_nanosleep failed. Err:%d errno:%d\n",
-                                               err, errno);
-                               break;
-                       }
-
-                       /* Measure periodic Tx scheduling variance */
-                       tx_ns = get_nsecs();
-                       diff = tx_ns - next_tx_ns;
-                       if (diff < tx_cycle_diff_min)
-                               tx_cycle_diff_min = diff;
-
-                       if (diff > tx_cycle_diff_max)
-                               tx_cycle_diff_max = diff;
-
-                       tx_cycle_diff_ave += (double)diff;
-                       tx_cycle_cnt++;
-               } else if (opt_tstamp) {
-                       tx_ns = get_nsecs();
-               }
-
-               for (i = 0; i < num_socks; i++)
-                       tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size, tx_ns);
-
-               pkt_cnt += tx_cnt;
-
-               if (benchmark_done)
-                       break;
-
-               if (opt_tx_cycle_ns)
-                       next_tx_ns += opt_tx_cycle_ns;
-       }
-
-       if (opt_pkt_count)
-               complete_tx_only_all();
-}
-
-static void l2fwd(struct xsk_socket_info *xsk)
-{
-       unsigned int rcvd, i;
-       u32 idx_rx = 0, idx_tx = 0;
-       int ret;
-
-       complete_tx_l2fwd(xsk);
-
-       rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
-       if (!rcvd) {
-               if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
-                       xsk->app_stats.rx_empty_polls++;
-                       recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
-               }
-               return;
-       }
-       xsk->ring_stats.rx_npkts += rcvd;
-
-       ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
-       while (ret != rcvd) {
-               if (ret < 0)
-                       exit_with_error(-ret);
-               complete_tx_l2fwd(xsk);
-               if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->tx)) {
-                       xsk->app_stats.tx_wakeup_sendtos++;
-                       kick_tx(xsk);
-               }
-               ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
-       }
-
-       for (i = 0; i < rcvd; i++) {
-               u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
-               u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
-               u64 orig = addr;
-
-               addr = xsk_umem__add_offset_to_addr(addr);
-               char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
-
-               swap_mac_addresses(pkt);
-
-               hex_dump(pkt, len, addr);
-               xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig;
-               xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
-       }
-
-       xsk_ring_prod__submit(&xsk->tx, rcvd);
-       xsk_ring_cons__release(&xsk->rx, rcvd);
-
-       xsk->ring_stats.tx_npkts += rcvd;
-       xsk->outstanding_tx += rcvd;
-}
-
-static void l2fwd_all(void)
-{
-       struct pollfd fds[MAX_SOCKS] = {};
-       int i, ret;
-
-       for (;;) {
-               if (opt_poll) {
-                       for (i = 0; i < num_socks; i++) {
-                               fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
-                               fds[i].events = POLLOUT | POLLIN;
-                               xsks[i]->app_stats.opt_polls++;
-                       }
-                       ret = poll(fds, num_socks, opt_timeout);
-                       if (ret <= 0)
-                               continue;
-               }
-
-               for (i = 0; i < num_socks; i++)
-                       l2fwd(xsks[i]);
-
-               if (benchmark_done)
-                       break;
-       }
-}
-
-static void load_xdp_program(char **argv, struct bpf_object **obj)
-{
-       struct bpf_prog_load_attr prog_load_attr = {
-               .prog_type      = BPF_PROG_TYPE_XDP,
-       };
-       char xdp_filename[256];
-       int prog_fd;
-
-       snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
-       prog_load_attr.file = xdp_filename;
-
-       if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd))
-               exit(EXIT_FAILURE);
-       if (prog_fd < 0) {
-               fprintf(stderr, "ERROR: no program found: %s\n",
-                       strerror(prog_fd));
-               exit(EXIT_FAILURE);
-       }
-
-       if (bpf_xdp_attach(opt_ifindex, prog_fd, opt_xdp_flags, NULL) < 0) {
-               fprintf(stderr, "ERROR: link set xdp fd failed\n");
-               exit(EXIT_FAILURE);
-       }
-}
-
-static void enter_xsks_into_map(struct bpf_object *obj)
-{
-       struct bpf_map *map;
-       int i, xsks_map;
-
-       map = bpf_object__find_map_by_name(obj, "xsks_map");
-       xsks_map = bpf_map__fd(map);
-       if (xsks_map < 0) {
-               fprintf(stderr, "ERROR: no xsks map found: %s\n",
-                       strerror(xsks_map));
-                       exit(EXIT_FAILURE);
-       }
-
-       for (i = 0; i < num_socks; i++) {
-               int fd = xsk_socket__fd(xsks[i]->xsk);
-               int key, ret;
-
-               key = i;
-               ret = bpf_map_update_elem(xsks_map, &key, &fd, 0);
-               if (ret) {
-                       fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
-                       exit(EXIT_FAILURE);
-               }
-       }
-}
-
-static void apply_setsockopt(struct xsk_socket_info *xsk)
-{
-       int sock_opt;
-
-       if (!opt_busy_poll)
-               return;
-
-       sock_opt = 1;
-       if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
-                      (void *)&sock_opt, sizeof(sock_opt)) < 0)
-               exit_with_error(errno);
-
-       sock_opt = 20;
-       if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
-                      (void *)&sock_opt, sizeof(sock_opt)) < 0)
-               exit_with_error(errno);
-
-       sock_opt = opt_batch_size;
-       if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
-                      (void *)&sock_opt, sizeof(sock_opt)) < 0)
-               exit_with_error(errno);
-}
-
-static int recv_xsks_map_fd_from_ctrl_node(int sock, int *_fd)
-{
-       char cms[CMSG_SPACE(sizeof(int))];
-       struct cmsghdr *cmsg;
-       struct msghdr msg;
-       struct iovec iov;
-       int value;
-       int len;
-
-       iov.iov_base = &value;
-       iov.iov_len = sizeof(int);
-
-       msg.msg_name = 0;
-       msg.msg_namelen = 0;
-       msg.msg_iov = &iov;
-       msg.msg_iovlen = 1;
-       msg.msg_flags = 0;
-       msg.msg_control = (caddr_t)cms;
-       msg.msg_controllen = sizeof(cms);
-
-       len = recvmsg(sock, &msg, 0);
-
-       if (len < 0) {
-               fprintf(stderr, "Recvmsg failed length incorrect.\n");
-               return -EINVAL;
-       }
-
-       if (len == 0) {
-               fprintf(stderr, "Recvmsg failed no data\n");
-               return -EINVAL;
-       }
-
-       cmsg = CMSG_FIRSTHDR(&msg);
-       *_fd = *(int *)CMSG_DATA(cmsg);
-
-       return 0;
-}
-
-static int
-recv_xsks_map_fd(int *xsks_map_fd)
-{
-       struct sockaddr_un server;
-       int err;
-
-       sock = socket(AF_UNIX, SOCK_STREAM, 0);
-       if (sock < 0) {
-               fprintf(stderr, "Error opening socket stream: %s", strerror(errno));
-               return errno;
-       }
-
-       server.sun_family = AF_UNIX;
-       strcpy(server.sun_path, SOCKET_NAME);
-
-       if (connect(sock, (struct sockaddr *)&server, sizeof(struct sockaddr_un)) < 0) {
-               close(sock);
-               fprintf(stderr, "Error connecting stream socket: %s", strerror(errno));
-               return errno;
-       }
-
-       err = recv_xsks_map_fd_from_ctrl_node(sock, xsks_map_fd);
-       if (err) {
-               fprintf(stderr, "Error %d receiving fd\n", err);
-               return err;
-       }
-       return 0;
-}
-
-int main(int argc, char **argv)
-{
-       struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 };
-       struct __user_cap_data_struct data[2] = { { 0 } };
-       bool rx = false, tx = false;
-       struct sched_param schparam;
-       struct xsk_umem_info *umem;
-       struct bpf_object *obj;
-       int xsks_map_fd = 0;
-       pthread_t pt;
-       int i, ret;
-       void *bufs;
-
-       parse_command_line(argc, argv);
-
-       if (opt_reduced_cap) {
-               if (capget(&hdr, data)  < 0)
-                       fprintf(stderr, "Error getting capabilities\n");
-
-               data->effective &= CAP_TO_MASK(CAP_NET_RAW);
-               data->permitted &= CAP_TO_MASK(CAP_NET_RAW);
-
-               if (capset(&hdr, data) < 0)
-                       fprintf(stderr, "Setting capabilities failed\n");
-
-               if (capget(&hdr, data)  < 0) {
-                       fprintf(stderr, "Error getting capabilities\n");
-               } else {
-                       fprintf(stderr, "Capabilities EFF %x Caps INH %x Caps Per %x\n",
-                               data[0].effective, data[0].inheritable, data[0].permitted);
-                       fprintf(stderr, "Capabilities EFF %x Caps INH %x Caps Per %x\n",
-                               data[1].effective, data[1].inheritable, data[1].permitted);
-               }
-       } else {
-               /* Use libbpf 1.0 API mode */
-               libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
-               if (opt_num_xsks > 1)
-                       load_xdp_program(argv, &obj);
-       }
-
-       /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
-       bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
-                   PROT_READ | PROT_WRITE,
-                   MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0);
-       if (bufs == MAP_FAILED) {
-               printf("ERROR: mmap failed\n");
-               exit(EXIT_FAILURE);
-       }
-
-       /* Create sockets... */
-       umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
-       if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) {
-               rx = true;
-               xsk_populate_fill_ring(umem);
-       }
-       if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY)
-               tx = true;
-       for (i = 0; i < opt_num_xsks; i++)
-               xsks[num_socks++] = xsk_configure_socket(umem, rx, tx);
-
-       for (i = 0; i < opt_num_xsks; i++)
-               apply_setsockopt(xsks[i]);
-
-       if (opt_bench == BENCH_TXONLY) {
-               if (opt_tstamp && opt_pkt_size < PKTGEN_SIZE_MIN)
-                       opt_pkt_size = PKTGEN_SIZE_MIN;
-
-               gen_eth_hdr_data();
-
-               for (i = 0; i < NUM_FRAMES; i++)
-                       gen_eth_frame(umem, i * opt_xsk_frame_size);
-       }
-
-       if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY)
-               enter_xsks_into_map(obj);
-
-       if (opt_reduced_cap) {
-               ret = recv_xsks_map_fd(&xsks_map_fd);
-               if (ret) {
-                       fprintf(stderr, "Error %d receiving xsks_map_fd\n", ret);
-                       exit_with_error(ret);
-               }
-               if (xsks[0]->xsk) {
-                       ret = xsk_socket__update_xskmap(xsks[0]->xsk, xsks_map_fd);
-                       if (ret) {
-                               fprintf(stderr, "Update of BPF map failed(%d)\n", ret);
-                               exit_with_error(ret);
-                       }
-               }
-       }
-
-       signal(SIGINT, int_exit);
-       signal(SIGTERM, int_exit);
-       signal(SIGABRT, int_exit);
-
-       setlocale(LC_ALL, "");
-
-       prev_time = get_nsecs();
-       start_time = prev_time;
-
-       if (!opt_quiet) {
-               ret = pthread_create(&pt, NULL, poller, NULL);
-               if (ret)
-                       exit_with_error(ret);
-       }
-
-       /* Configure sched priority for better wake-up accuracy */
-       memset(&schparam, 0, sizeof(schparam));
-       schparam.sched_priority = opt_schprio;
-       ret = sched_setscheduler(0, opt_schpolicy, &schparam);
-       if (ret) {
-               fprintf(stderr, "Error(%d) in setting priority(%d): %s\n",
-                       errno, opt_schprio, strerror(errno));
-               goto out;
-       }
-
-       if (opt_bench == BENCH_RXDROP)
-               rx_drop_all();
-       else if (opt_bench == BENCH_TXONLY)
-               tx_only_all();
-       else
-               l2fwd_all();
-
-out:
-       benchmark_done = true;
-
-       if (!opt_quiet)
-               pthread_join(pt, NULL);
-
-       xdpsock_cleanup();
-
-       munmap(bufs, NUM_FRAMES * opt_xsk_frame_size);
-
-       return 0;
-}
diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c
deleted file mode 100644 (file)
index 2324e18..0000000
+++ /dev/null
@@ -1,1085 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2020 Intel Corporation. */
-
-#define _GNU_SOURCE
-#include <poll.h>
-#include <pthread.h>
-#include <signal.h>
-#include <sched.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <time.h>
-#include <unistd.h>
-#include <getopt.h>
-#include <netinet/ether.h>
-#include <net/if.h>
-
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <linux/if_xdp.h>
-
-#include <bpf/libbpf.h>
-#include <bpf/xsk.h>
-#include <bpf/bpf.h>
-
-/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-typedef __u64 u64;
-typedef __u32 u32;
-typedef __u16 u16;
-typedef __u8  u8;
-
-/* This program illustrates the packet forwarding between multiple AF_XDP
- * sockets in multi-threaded environment. All threads are sharing a common
- * buffer pool, with each socket having its own private buffer cache.
- *
- * Example 1: Single thread handling two sockets. The packets received by socket
- * A (interface IFA, queue QA) are forwarded to socket B (interface IFB, queue
- * QB), while the packets received by socket B are forwarded to socket A. The
- * thread is running on CPU core X:
- *
- *         ./xsk_fwd -i IFA -q QA -i IFB -q QB -c X
- *
- * Example 2: Two threads, each handling two sockets. The thread running on CPU
- * core X forwards all the packets received by socket A to socket B, and all the
- * packets received by socket B to socket A. The thread running on CPU core Y is
- * performing the same packet forwarding between sockets C and D:
- *
- *         ./xsk_fwd -i IFA -q QA -i IFB -q QB -i IFC -q QC -i IFD -q QD
- *         -c CX -c CY
- */
-
-/*
- * Buffer pool and buffer cache
- *
- * For packet forwarding, the packet buffers are typically allocated from the
- * pool for packet reception and freed back to the pool for further reuse once
- * the packet transmission is completed.
- *
- * The buffer pool is shared between multiple threads. In order to minimize the
- * access latency to the shared buffer pool, each thread creates one (or
- * several) buffer caches, which, unlike the buffer pool, are private to the
- * thread that creates them and therefore cannot be shared with other threads.
- * The access to the shared pool is only needed either (A) when the cache gets
- * empty due to repeated buffer allocations and it needs to be replenished from
- * the pool, or (B) when the cache gets full due to repeated buffer free and it
- * needs to be flushed back to the pull.
- *
- * In a packet forwarding system, a packet received on any input port can
- * potentially be transmitted on any output port, depending on the forwarding
- * configuration. For AF_XDP sockets, for this to work with zero-copy of the
- * packet buffers when, it is required that the buffer pool memory fits into the
- * UMEM area shared by all the sockets.
- */
-
-struct bpool_params {
-       u32 n_buffers;
-       u32 buffer_size;
-       int mmap_flags;
-
-       u32 n_users_max;
-       u32 n_buffers_per_slab;
-};
-
-/* This buffer pool implementation organizes the buffers into equally sized
- * slabs of *n_buffers_per_slab*. Initially, there are *n_slabs* slabs in the
- * pool that are completely filled with buffer pointers (full slabs).
- *
- * Each buffer cache has a slab for buffer allocation and a slab for buffer
- * free, with both of these slabs initially empty. When the cache's allocation
- * slab goes empty, it is swapped with one of the available full slabs from the
- * pool, if any is available. When the cache's free slab goes full, it is
- * swapped for one of the empty slabs from the pool, which is guaranteed to
- * succeed.
- *
- * Partially filled slabs never get traded between the cache and the pool
- * (except when the cache itself is destroyed), which enables fast operation
- * through pointer swapping.
- */
-struct bpool {
-       struct bpool_params params;
-       pthread_mutex_t lock;
-       void *addr;
-
-       u64 **slabs;
-       u64 **slabs_reserved;
-       u64 *buffers;
-       u64 *buffers_reserved;
-
-       u64 n_slabs;
-       u64 n_slabs_reserved;
-       u64 n_buffers;
-
-       u64 n_slabs_available;
-       u64 n_slabs_reserved_available;
-
-       struct xsk_umem_config umem_cfg;
-       struct xsk_ring_prod umem_fq;
-       struct xsk_ring_cons umem_cq;
-       struct xsk_umem *umem;
-};
-
-static struct bpool *
-bpool_init(struct bpool_params *params,
-          struct xsk_umem_config *umem_cfg)
-{
-       u64 n_slabs, n_slabs_reserved, n_buffers, n_buffers_reserved;
-       u64 slabs_size, slabs_reserved_size;
-       u64 buffers_size, buffers_reserved_size;
-       u64 total_size, i;
-       struct bpool *bp;
-       u8 *p;
-       int status;
-
-       /* Use libbpf 1.0 API mode */
-       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
-       /* bpool internals dimensioning. */
-       n_slabs = (params->n_buffers + params->n_buffers_per_slab - 1) /
-               params->n_buffers_per_slab;
-       n_slabs_reserved = params->n_users_max * 2;
-       n_buffers = n_slabs * params->n_buffers_per_slab;
-       n_buffers_reserved = n_slabs_reserved * params->n_buffers_per_slab;
-
-       slabs_size = n_slabs * sizeof(u64 *);
-       slabs_reserved_size = n_slabs_reserved * sizeof(u64 *);
-       buffers_size = n_buffers * sizeof(u64);
-       buffers_reserved_size = n_buffers_reserved * sizeof(u64);
-
-       total_size = sizeof(struct bpool) +
-               slabs_size + slabs_reserved_size +
-               buffers_size + buffers_reserved_size;
-
-       /* bpool memory allocation. */
-       p = calloc(total_size, sizeof(u8));
-       if (!p)
-               return NULL;
-
-       /* bpool memory initialization. */
-       bp = (struct bpool *)p;
-       memcpy(&bp->params, params, sizeof(*params));
-       bp->params.n_buffers = n_buffers;
-
-       bp->slabs = (u64 **)&p[sizeof(struct bpool)];
-       bp->slabs_reserved = (u64 **)&p[sizeof(struct bpool) +
-               slabs_size];
-       bp->buffers = (u64 *)&p[sizeof(struct bpool) +
-               slabs_size + slabs_reserved_size];
-       bp->buffers_reserved = (u64 *)&p[sizeof(struct bpool) +
-               slabs_size + slabs_reserved_size + buffers_size];
-
-       bp->n_slabs = n_slabs;
-       bp->n_slabs_reserved = n_slabs_reserved;
-       bp->n_buffers = n_buffers;
-
-       for (i = 0; i < n_slabs; i++)
-               bp->slabs[i] = &bp->buffers[i * params->n_buffers_per_slab];
-       bp->n_slabs_available = n_slabs;
-
-       for (i = 0; i < n_slabs_reserved; i++)
-               bp->slabs_reserved[i] = &bp->buffers_reserved[i *
-                       params->n_buffers_per_slab];
-       bp->n_slabs_reserved_available = n_slabs_reserved;
-
-       for (i = 0; i < n_buffers; i++)
-               bp->buffers[i] = i * params->buffer_size;
-
-       /* lock. */
-       status = pthread_mutex_init(&bp->lock, NULL);
-       if (status) {
-               free(p);
-               return NULL;
-       }
-
-       /* mmap. */
-       bp->addr = mmap(NULL,
-                       n_buffers * params->buffer_size,
-                       PROT_READ | PROT_WRITE,
-                       MAP_PRIVATE | MAP_ANONYMOUS | params->mmap_flags,
-                       -1,
-                       0);
-       if (bp->addr == MAP_FAILED) {
-               pthread_mutex_destroy(&bp->lock);
-               free(p);
-               return NULL;
-       }
-
-       /* umem. */
-       status = xsk_umem__create(&bp->umem,
-                                 bp->addr,
-                                 bp->params.n_buffers * bp->params.buffer_size,
-                                 &bp->umem_fq,
-                                 &bp->umem_cq,
-                                 umem_cfg);
-       if (status) {
-               munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size);
-               pthread_mutex_destroy(&bp->lock);
-               free(p);
-               return NULL;
-       }
-       memcpy(&bp->umem_cfg, umem_cfg, sizeof(*umem_cfg));
-
-       return bp;
-}
-
-static void
-bpool_free(struct bpool *bp)
-{
-       if (!bp)
-               return;
-
-       xsk_umem__delete(bp->umem);
-       munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size);
-       pthread_mutex_destroy(&bp->lock);
-       free(bp);
-}
-
-struct bcache {
-       struct bpool *bp;
-
-       u64 *slab_cons;
-       u64 *slab_prod;
-
-       u64 n_buffers_cons;
-       u64 n_buffers_prod;
-};
-
-static u32
-bcache_slab_size(struct bcache *bc)
-{
-       struct bpool *bp = bc->bp;
-
-       return bp->params.n_buffers_per_slab;
-}
-
-static struct bcache *
-bcache_init(struct bpool *bp)
-{
-       struct bcache *bc;
-
-       bc = calloc(1, sizeof(struct bcache));
-       if (!bc)
-               return NULL;
-
-       bc->bp = bp;
-       bc->n_buffers_cons = 0;
-       bc->n_buffers_prod = 0;
-
-       pthread_mutex_lock(&bp->lock);
-       if (bp->n_slabs_reserved_available == 0) {
-               pthread_mutex_unlock(&bp->lock);
-               free(bc);
-               return NULL;
-       }
-
-       bc->slab_cons = bp->slabs_reserved[bp->n_slabs_reserved_available - 1];
-       bc->slab_prod = bp->slabs_reserved[bp->n_slabs_reserved_available - 2];
-       bp->n_slabs_reserved_available -= 2;
-       pthread_mutex_unlock(&bp->lock);
-
-       return bc;
-}
-
-static void
-bcache_free(struct bcache *bc)
-{
-       struct bpool *bp;
-
-       if (!bc)
-               return;
-
-       /* In order to keep this example simple, the case of freeing any
-        * existing buffers from the cache back to the pool is ignored.
-        */
-
-       bp = bc->bp;
-       pthread_mutex_lock(&bp->lock);
-       bp->slabs_reserved[bp->n_slabs_reserved_available] = bc->slab_prod;
-       bp->slabs_reserved[bp->n_slabs_reserved_available + 1] = bc->slab_cons;
-       bp->n_slabs_reserved_available += 2;
-       pthread_mutex_unlock(&bp->lock);
-
-       free(bc);
-}
-
-/* To work correctly, the implementation requires that the *n_buffers* input
- * argument is never greater than the buffer pool's *n_buffers_per_slab*. This
- * is typically the case, with one exception taking place when large number of
- * buffers are allocated at init time (e.g. for the UMEM fill queue setup).
- */
-static inline u32
-bcache_cons_check(struct bcache *bc, u32 n_buffers)
-{
-       struct bpool *bp = bc->bp;
-       u64 n_buffers_per_slab = bp->params.n_buffers_per_slab;
-       u64 n_buffers_cons = bc->n_buffers_cons;
-       u64 n_slabs_available;
-       u64 *slab_full;
-
-       /*
-        * Consumer slab is not empty: Use what's available locally. Do not
-        * look for more buffers from the pool when the ask can only be
-        * partially satisfied.
-        */
-       if (n_buffers_cons)
-               return (n_buffers_cons < n_buffers) ?
-                       n_buffers_cons :
-                       n_buffers;
-
-       /*
-        * Consumer slab is empty: look to trade the current consumer slab
-        * (full) for a full slab from the pool, if any is available.
-        */
-       pthread_mutex_lock(&bp->lock);
-       n_slabs_available = bp->n_slabs_available;
-       if (!n_slabs_available) {
-               pthread_mutex_unlock(&bp->lock);
-               return 0;
-       }
-
-       n_slabs_available--;
-       slab_full = bp->slabs[n_slabs_available];
-       bp->slabs[n_slabs_available] = bc->slab_cons;
-       bp->n_slabs_available = n_slabs_available;
-       pthread_mutex_unlock(&bp->lock);
-
-       bc->slab_cons = slab_full;
-       bc->n_buffers_cons = n_buffers_per_slab;
-       return n_buffers;
-}
-
-static inline u64
-bcache_cons(struct bcache *bc)
-{
-       u64 n_buffers_cons = bc->n_buffers_cons - 1;
-       u64 buffer;
-
-       buffer = bc->slab_cons[n_buffers_cons];
-       bc->n_buffers_cons = n_buffers_cons;
-       return buffer;
-}
-
-static inline void
-bcache_prod(struct bcache *bc, u64 buffer)
-{
-       struct bpool *bp = bc->bp;
-       u64 n_buffers_per_slab = bp->params.n_buffers_per_slab;
-       u64 n_buffers_prod = bc->n_buffers_prod;
-       u64 n_slabs_available;
-       u64 *slab_empty;
-
-       /*
-        * Producer slab is not yet full: store the current buffer to it.
-        */
-       if (n_buffers_prod < n_buffers_per_slab) {
-               bc->slab_prod[n_buffers_prod] = buffer;
-               bc->n_buffers_prod = n_buffers_prod + 1;
-               return;
-       }
-
-       /*
-        * Producer slab is full: trade the cache's current producer slab
-        * (full) for an empty slab from the pool, then store the current
-        * buffer to the new producer slab. As one full slab exists in the
-        * cache, it is guaranteed that there is at least one empty slab
-        * available in the pool.
-        */
-       pthread_mutex_lock(&bp->lock);
-       n_slabs_available = bp->n_slabs_available;
-       slab_empty = bp->slabs[n_slabs_available];
-       bp->slabs[n_slabs_available] = bc->slab_prod;
-       bp->n_slabs_available = n_slabs_available + 1;
-       pthread_mutex_unlock(&bp->lock);
-
-       slab_empty[0] = buffer;
-       bc->slab_prod = slab_empty;
-       bc->n_buffers_prod = 1;
-}
-
-/*
- * Port
- *
- * Each of the forwarding ports sits on top of an AF_XDP socket. In order for
- * packet forwarding to happen with no packet buffer copy, all the sockets need
- * to share the same UMEM area, which is used as the buffer pool memory.
- */
-#ifndef MAX_BURST_RX
-#define MAX_BURST_RX 64
-#endif
-
-#ifndef MAX_BURST_TX
-#define MAX_BURST_TX 64
-#endif
-
-struct burst_rx {
-       u64 addr[MAX_BURST_RX];
-       u32 len[MAX_BURST_RX];
-};
-
-struct burst_tx {
-       u64 addr[MAX_BURST_TX];
-       u32 len[MAX_BURST_TX];
-       u32 n_pkts;
-};
-
-struct port_params {
-       struct xsk_socket_config xsk_cfg;
-       struct bpool *bp;
-       const char *iface;
-       u32 iface_queue;
-};
-
-struct port {
-       struct port_params params;
-
-       struct bcache *bc;
-
-       struct xsk_ring_cons rxq;
-       struct xsk_ring_prod txq;
-       struct xsk_ring_prod umem_fq;
-       struct xsk_ring_cons umem_cq;
-       struct xsk_socket *xsk;
-       int umem_fq_initialized;
-
-       u64 n_pkts_rx;
-       u64 n_pkts_tx;
-};
-
-static void
-port_free(struct port *p)
-{
-       if (!p)
-               return;
-
-       /* To keep this example simple, the code to free the buffers from the
-        * socket's receive and transmit queues, as well as from the UMEM fill
-        * and completion queues, is not included.
-        */
-
-       if (p->xsk)
-               xsk_socket__delete(p->xsk);
-
-       bcache_free(p->bc);
-
-       free(p);
-}
-
-static struct port *
-port_init(struct port_params *params)
-{
-       struct port *p;
-       u32 umem_fq_size, pos = 0;
-       int status, i;
-
-       /* Memory allocation and initialization. */
-       p = calloc(sizeof(struct port), 1);
-       if (!p)
-               return NULL;
-
-       memcpy(&p->params, params, sizeof(p->params));
-       umem_fq_size = params->bp->umem_cfg.fill_size;
-
-       /* bcache. */
-       p->bc = bcache_init(params->bp);
-       if (!p->bc ||
-           (bcache_slab_size(p->bc) < umem_fq_size) ||
-           (bcache_cons_check(p->bc, umem_fq_size) < umem_fq_size)) {
-               port_free(p);
-               return NULL;
-       }
-
-       /* xsk socket. */
-       status = xsk_socket__create_shared(&p->xsk,
-                                          params->iface,
-                                          params->iface_queue,
-                                          params->bp->umem,
-                                          &p->rxq,
-                                          &p->txq,
-                                          &p->umem_fq,
-                                          &p->umem_cq,
-                                          &params->xsk_cfg);
-       if (status) {
-               port_free(p);
-               return NULL;
-       }
-
-       /* umem fq. */
-       xsk_ring_prod__reserve(&p->umem_fq, umem_fq_size, &pos);
-
-       for (i = 0; i < umem_fq_size; i++)
-               *xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) =
-                       bcache_cons(p->bc);
-
-       xsk_ring_prod__submit(&p->umem_fq, umem_fq_size);
-       p->umem_fq_initialized = 1;
-
-       return p;
-}
-
-static inline u32
-port_rx_burst(struct port *p, struct burst_rx *b)
-{
-       u32 n_pkts, pos, i;
-
-       /* Free buffers for FQ replenish. */
-       n_pkts = ARRAY_SIZE(b->addr);
-
-       n_pkts = bcache_cons_check(p->bc, n_pkts);
-       if (!n_pkts)
-               return 0;
-
-       /* RXQ. */
-       n_pkts = xsk_ring_cons__peek(&p->rxq, n_pkts, &pos);
-       if (!n_pkts) {
-               if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) {
-                       struct pollfd pollfd = {
-                               .fd = xsk_socket__fd(p->xsk),
-                               .events = POLLIN,
-                       };
-
-                       poll(&pollfd, 1, 0);
-               }
-               return 0;
-       }
-
-       for (i = 0; i < n_pkts; i++) {
-               b->addr[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->addr;
-               b->len[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->len;
-       }
-
-       xsk_ring_cons__release(&p->rxq, n_pkts);
-       p->n_pkts_rx += n_pkts;
-
-       /* UMEM FQ. */
-       for ( ; ; ) {
-               int status;
-
-               status = xsk_ring_prod__reserve(&p->umem_fq, n_pkts, &pos);
-               if (status == n_pkts)
-                       break;
-
-               if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) {
-                       struct pollfd pollfd = {
-                               .fd = xsk_socket__fd(p->xsk),
-                               .events = POLLIN,
-                       };
-
-                       poll(&pollfd, 1, 0);
-               }
-       }
-
-       for (i = 0; i < n_pkts; i++)
-               *xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) =
-                       bcache_cons(p->bc);
-
-       xsk_ring_prod__submit(&p->umem_fq, n_pkts);
-
-       return n_pkts;
-}
-
-static inline void
-port_tx_burst(struct port *p, struct burst_tx *b)
-{
-       u32 n_pkts, pos, i;
-       int status;
-
-       /* UMEM CQ. */
-       n_pkts = p->params.bp->umem_cfg.comp_size;
-
-       n_pkts = xsk_ring_cons__peek(&p->umem_cq, n_pkts, &pos);
-
-       for (i = 0; i < n_pkts; i++) {
-               u64 addr = *xsk_ring_cons__comp_addr(&p->umem_cq, pos + i);
-
-               bcache_prod(p->bc, addr);
-       }
-
-       xsk_ring_cons__release(&p->umem_cq, n_pkts);
-
-       /* TXQ. */
-       n_pkts = b->n_pkts;
-
-       for ( ; ; ) {
-               status = xsk_ring_prod__reserve(&p->txq, n_pkts, &pos);
-               if (status == n_pkts)
-                       break;
-
-               if (xsk_ring_prod__needs_wakeup(&p->txq))
-                       sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT,
-                              NULL, 0);
-       }
-
-       for (i = 0; i < n_pkts; i++) {
-               xsk_ring_prod__tx_desc(&p->txq, pos + i)->addr = b->addr[i];
-               xsk_ring_prod__tx_desc(&p->txq, pos + i)->len = b->len[i];
-       }
-
-       xsk_ring_prod__submit(&p->txq, n_pkts);
-       if (xsk_ring_prod__needs_wakeup(&p->txq))
-               sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
-       p->n_pkts_tx += n_pkts;
-}
-
-/*
- * Thread
- *
- * Packet forwarding threads.
- */
-#ifndef MAX_PORTS_PER_THREAD
-#define MAX_PORTS_PER_THREAD 16
-#endif
-
-struct thread_data {
-       struct port *ports_rx[MAX_PORTS_PER_THREAD];
-       struct port *ports_tx[MAX_PORTS_PER_THREAD];
-       u32 n_ports_rx;
-       struct burst_rx burst_rx;
-       struct burst_tx burst_tx[MAX_PORTS_PER_THREAD];
-       u32 cpu_core_id;
-       int quit;
-};
-
-static void swap_mac_addresses(void *data)
-{
-       struct ether_header *eth = (struct ether_header *)data;
-       struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
-       struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
-       struct ether_addr tmp;
-
-       tmp = *src_addr;
-       *src_addr = *dst_addr;
-       *dst_addr = tmp;
-}
-
-static void *
-thread_func(void *arg)
-{
-       struct thread_data *t = arg;
-       cpu_set_t cpu_cores;
-       u32 i;
-
-       CPU_ZERO(&cpu_cores);
-       CPU_SET(t->cpu_core_id, &cpu_cores);
-       pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_cores);
-
-       for (i = 0; !t->quit; i = (i + 1) & (t->n_ports_rx - 1)) {
-               struct port *port_rx = t->ports_rx[i];
-               struct port *port_tx = t->ports_tx[i];
-               struct burst_rx *brx = &t->burst_rx;
-               struct burst_tx *btx = &t->burst_tx[i];
-               u32 n_pkts, j;
-
-               /* RX. */
-               n_pkts = port_rx_burst(port_rx, brx);
-               if (!n_pkts)
-                       continue;
-
-               /* Process & TX. */
-               for (j = 0; j < n_pkts; j++) {
-                       u64 addr = xsk_umem__add_offset_to_addr(brx->addr[j]);
-                       u8 *pkt = xsk_umem__get_data(port_rx->params.bp->addr,
-                                                    addr);
-
-                       swap_mac_addresses(pkt);
-
-                       btx->addr[btx->n_pkts] = brx->addr[j];
-                       btx->len[btx->n_pkts] = brx->len[j];
-                       btx->n_pkts++;
-
-                       if (btx->n_pkts == MAX_BURST_TX) {
-                               port_tx_burst(port_tx, btx);
-                               btx->n_pkts = 0;
-                       }
-               }
-       }
-
-       return NULL;
-}
-
-/*
- * Process
- */
-static const struct bpool_params bpool_params_default = {
-       .n_buffers = 64 * 1024,
-       .buffer_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
-       .mmap_flags = 0,
-
-       .n_users_max = 16,
-       .n_buffers_per_slab = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
-};
-
-static const struct xsk_umem_config umem_cfg_default = {
-       .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
-       .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
-       .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
-       .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
-       .flags = 0,
-};
-
-static const struct port_params port_params_default = {
-       .xsk_cfg = {
-               .rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
-               .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
-               .libbpf_flags = 0,
-               .xdp_flags = XDP_FLAGS_DRV_MODE,
-               .bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY,
-       },
-
-       .bp = NULL,
-       .iface = NULL,
-       .iface_queue = 0,
-};
-
-#ifndef MAX_PORTS
-#define MAX_PORTS 64
-#endif
-
-#ifndef MAX_THREADS
-#define MAX_THREADS 64
-#endif
-
-static struct bpool_params bpool_params;
-static struct xsk_umem_config umem_cfg;
-static struct bpool *bp;
-
-static struct port_params port_params[MAX_PORTS];
-static struct port *ports[MAX_PORTS];
-static u64 n_pkts_rx[MAX_PORTS];
-static u64 n_pkts_tx[MAX_PORTS];
-static int n_ports;
-
-static pthread_t threads[MAX_THREADS];
-static struct thread_data thread_data[MAX_THREADS];
-static int n_threads;
-
-static void
-print_usage(char *prog_name)
-{
-       const char *usage =
-               "Usage:\n"
-               "\t%s [ -b SIZE ] -c CORE -i INTERFACE [ -q QUEUE ]\n"
-               "\n"
-               "-c CORE        CPU core to run a packet forwarding thread\n"
-               "               on. May be invoked multiple times.\n"
-               "\n"
-               "-b SIZE        Number of buffers in the buffer pool shared\n"
-               "               by all the forwarding threads. Default: %u.\n"
-               "\n"
-               "-i INTERFACE   Network interface. Each (INTERFACE, QUEUE)\n"
-               "               pair specifies one forwarding port. May be\n"
-               "               invoked multiple times.\n"
-               "\n"
-               "-q QUEUE       Network interface queue for RX and TX. Each\n"
-               "               (INTERFACE, QUEUE) pair specified one\n"
-               "               forwarding port. Default: %u. May be invoked\n"
-               "               multiple times.\n"
-               "\n";
-       printf(usage,
-              prog_name,
-              bpool_params_default.n_buffers,
-              port_params_default.iface_queue);
-}
-
-static int
-parse_args(int argc, char **argv)
-{
-       struct option lgopts[] = {
-               { NULL,  0, 0, 0 }
-       };
-       int opt, option_index;
-
-       /* Parse the input arguments. */
-       for ( ; ;) {
-               opt = getopt_long(argc, argv, "c:i:q:", lgopts, &option_index);
-               if (opt == EOF)
-                       break;
-
-               switch (opt) {
-               case 'b':
-                       bpool_params.n_buffers = atoi(optarg);
-                       break;
-
-               case 'c':
-                       if (n_threads == MAX_THREADS) {
-                               printf("Max number of threads (%d) reached.\n",
-                                      MAX_THREADS);
-                               return -1;
-                       }
-
-                       thread_data[n_threads].cpu_core_id = atoi(optarg);
-                       n_threads++;
-                       break;
-
-               case 'i':
-                       if (n_ports == MAX_PORTS) {
-                               printf("Max number of ports (%d) reached.\n",
-                                      MAX_PORTS);
-                               return -1;
-                       }
-
-                       port_params[n_ports].iface = optarg;
-                       port_params[n_ports].iface_queue = 0;
-                       n_ports++;
-                       break;
-
-               case 'q':
-                       if (n_ports == 0) {
-                               printf("No port specified for queue.\n");
-                               return -1;
-                       }
-                       port_params[n_ports - 1].iface_queue = atoi(optarg);
-                       break;
-
-               default:
-                       printf("Illegal argument.\n");
-                       return -1;
-               }
-       }
-
-       optind = 1; /* reset getopt lib */
-
-       /* Check the input arguments. */
-       if (!n_ports) {
-               printf("No ports specified.\n");
-               return -1;
-       }
-
-       if (!n_threads) {
-               printf("No threads specified.\n");
-               return -1;
-       }
-
-       if (n_ports % n_threads) {
-               printf("Ports cannot be evenly distributed to threads.\n");
-               return -1;
-       }
-
-       return 0;
-}
-
-static void
-print_port(u32 port_id)
-{
-       struct port *port = ports[port_id];
-
-       printf("Port %u: interface = %s, queue = %u\n",
-              port_id, port->params.iface, port->params.iface_queue);
-}
-
-static void
-print_thread(u32 thread_id)
-{
-       struct thread_data *t = &thread_data[thread_id];
-       u32 i;
-
-       printf("Thread %u (CPU core %u): ",
-              thread_id, t->cpu_core_id);
-
-       for (i = 0; i < t->n_ports_rx; i++) {
-               struct port *port_rx = t->ports_rx[i];
-               struct port *port_tx = t->ports_tx[i];
-
-               printf("(%s, %u) -> (%s, %u), ",
-                      port_rx->params.iface,
-                      port_rx->params.iface_queue,
-                      port_tx->params.iface,
-                      port_tx->params.iface_queue);
-       }
-
-       printf("\n");
-}
-
-static void
-print_port_stats_separator(void)
-{
-       printf("+-%4s-+-%12s-+-%13s-+-%12s-+-%13s-+\n",
-              "----",
-              "------------",
-              "-------------",
-              "------------",
-              "-------------");
-}
-
-static void
-print_port_stats_header(void)
-{
-       print_port_stats_separator();
-       printf("| %4s | %12s | %13s | %12s | %13s |\n",
-              "Port",
-              "RX packets",
-              "RX rate (pps)",
-              "TX packets",
-              "TX_rate (pps)");
-       print_port_stats_separator();
-}
-
-static void
-print_port_stats_trailer(void)
-{
-       print_port_stats_separator();
-       printf("\n");
-}
-
-static void
-print_port_stats(int port_id, u64 ns_diff)
-{
-       struct port *p = ports[port_id];
-       double rx_pps, tx_pps;
-
-       rx_pps = (p->n_pkts_rx - n_pkts_rx[port_id]) * 1000000000. / ns_diff;
-       tx_pps = (p->n_pkts_tx - n_pkts_tx[port_id]) * 1000000000. / ns_diff;
-
-       printf("| %4d | %12llu | %13.0f | %12llu | %13.0f |\n",
-              port_id,
-              p->n_pkts_rx,
-              rx_pps,
-              p->n_pkts_tx,
-              tx_pps);
-
-       n_pkts_rx[port_id] = p->n_pkts_rx;
-       n_pkts_tx[port_id] = p->n_pkts_tx;
-}
-
-static void
-print_port_stats_all(u64 ns_diff)
-{
-       int i;
-
-       print_port_stats_header();
-       for (i = 0; i < n_ports; i++)
-               print_port_stats(i, ns_diff);
-       print_port_stats_trailer();
-}
-
-static int quit;
-
-static void
-signal_handler(int sig)
-{
-       quit = 1;
-}
-
-static void remove_xdp_program(void)
-{
-       int i;
-
-       for (i = 0 ; i < n_ports; i++)
-               bpf_xdp_detach(if_nametoindex(port_params[i].iface),
-                              port_params[i].xsk_cfg.xdp_flags, NULL);
-}
-
-int main(int argc, char **argv)
-{
-       struct timespec time;
-       u64 ns0;
-       int i;
-
-       /* Parse args. */
-       memcpy(&bpool_params, &bpool_params_default,
-              sizeof(struct bpool_params));
-       memcpy(&umem_cfg, &umem_cfg_default,
-              sizeof(struct xsk_umem_config));
-       for (i = 0; i < MAX_PORTS; i++)
-               memcpy(&port_params[i], &port_params_default,
-                      sizeof(struct port_params));
-
-       if (parse_args(argc, argv)) {
-               print_usage(argv[0]);
-               return -1;
-       }
-
-       /* Buffer pool initialization. */
-       bp = bpool_init(&bpool_params, &umem_cfg);
-       if (!bp) {
-               printf("Buffer pool initialization failed.\n");
-               return -1;
-       }
-       printf("Buffer pool created successfully.\n");
-
-       /* Ports initialization. */
-       for (i = 0; i < MAX_PORTS; i++)
-               port_params[i].bp = bp;
-
-       for (i = 0; i < n_ports; i++) {
-               ports[i] = port_init(&port_params[i]);
-               if (!ports[i]) {
-                       printf("Port %d initialization failed.\n", i);
-                       return -1;
-               }
-               print_port(i);
-       }
-       printf("All ports created successfully.\n");
-
-       /* Threads. */
-       for (i = 0; i < n_threads; i++) {
-               struct thread_data *t = &thread_data[i];
-               u32 n_ports_per_thread = n_ports / n_threads, j;
-
-               for (j = 0; j < n_ports_per_thread; j++) {
-                       t->ports_rx[j] = ports[i * n_ports_per_thread + j];
-                       t->ports_tx[j] = ports[i * n_ports_per_thread +
-                               (j + 1) % n_ports_per_thread];
-               }
-
-               t->n_ports_rx = n_ports_per_thread;
-
-               print_thread(i);
-       }
-
-       for (i = 0; i < n_threads; i++) {
-               int status;
-
-               status = pthread_create(&threads[i],
-                                       NULL,
-                                       thread_func,
-                                       &thread_data[i]);
-               if (status) {
-                       printf("Thread %d creation failed.\n", i);
-                       return -1;
-               }
-       }
-       printf("All threads created successfully.\n");
-
-       /* Print statistics. */
-       signal(SIGINT, signal_handler);
-       signal(SIGTERM, signal_handler);
-       signal(SIGABRT, signal_handler);
-
-       clock_gettime(CLOCK_MONOTONIC, &time);
-       ns0 = time.tv_sec * 1000000000UL + time.tv_nsec;
-       for ( ; !quit; ) {
-               u64 ns1, ns_diff;
-
-               sleep(1);
-               clock_gettime(CLOCK_MONOTONIC, &time);
-               ns1 = time.tv_sec * 1000000000UL + time.tv_nsec;
-               ns_diff = ns1 - ns0;
-               ns0 = ns1;
-
-               print_port_stats_all(ns_diff);
-       }
-
-       /* Threads completion. */
-       printf("Quit.\n");
-       for (i = 0; i < n_threads; i++)
-               thread_data[i].quit = 1;
-
-       for (i = 0; i < n_threads; i++)
-               pthread_join(threads[i], NULL);
-
-       for (i = 0; i < n_ports; i++)
-               port_free(ports[i]);
-
-       bpool_free(bp);
-
-       remove_xdp_program();
-
-       return 0;
-}
index 4ce9a77..e44039f 100644 (file)
@@ -24,9 +24,11 @@ FEATURE COMMANDS
 ================
 
 |      **bpftool** **feature probe** [*COMPONENT*] [**full**] [**unprivileged**] [**macros** [**prefix** *PREFIX*]]
+|      **bpftool** **feature list_builtins** *GROUP*
 |      **bpftool** **feature help**
 |
 |      *COMPONENT* := { **kernel** | **dev** *NAME* }
+|      *GROUP* := { **prog_types** | **map_types** | **attach_types** | **link_types** | **helpers** }
 
 DESCRIPTION
 ===========
@@ -70,6 +72,16 @@ DESCRIPTION
                  The keywords **full**, **macros** and **prefix** have the
                  same role as when probing the kernel.
 
+       **bpftool feature list_builtins** *GROUP*
+                 List items known to bpftool. These can be BPF program types
+                 (**prog_types**), BPF map types (**map_types**), attach types
+                 (**attach_types**), link types (**link_types**), or BPF helper
+                 functions (**helpers**). The command does not probe the system, but
+                 simply lists the elements that bpftool knows from compilation time,
+                 as provided from libbpf (for all object types) or from the BPF UAPI
+                 header (list of helpers). This can be used in scripts to iterate over
+                 BPF types or helpers.
+
        **bpftool feature help**
                  Print short help message.
 
index c19e0e4..6b5b3a9 100644 (file)
@@ -93,10 +93,8 @@ INSTALL ?= install
 RM ?= rm -f
 
 FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd disassembler-four-args zlib libcap \
-       clang-bpf-co-re
-FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \
-       clang-bpf-co-re
+FEATURE_TESTS = libbfd disassembler-four-args libcap clang-bpf-co-re
+FEATURE_DISPLAY = libbfd disassembler-four-args libcap clang-bpf-co-re
 
 check_feat := 1
 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -204,11 +202,6 @@ $(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
        $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@
 
-$(OUTPUT)feature.o:
-ifneq ($(feature-zlib), 1)
-       $(error "No zlib found")
-endif
-
 $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP)
        $(QUIET_LINK)$(HOSTCC) $(HOST_CFLAGS) $(LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@
 
index 91f89a9..dc1641e 100644 (file)
@@ -703,15 +703,8 @@ _bpftool()
                             return 0
                             ;;
                         type)
-                            local BPFTOOL_MAP_CREATE_TYPES='hash array \
-                                prog_array perf_event_array percpu_hash \
-                                percpu_array stack_trace cgroup_array lru_hash \
-                                lru_percpu_hash lpm_trie array_of_maps \
-                                hash_of_maps devmap devmap_hash sockmap cpumap \
-                                xskmap sockhash cgroup_storage reuseport_sockarray \
-                                percpu_cgroup_storage queue stack sk_storage \
-                                struct_ops ringbuf inode_storage task_storage \
-                                bloom_filter'
+                            local BPFTOOL_MAP_CREATE_TYPES="$(bpftool feature list_builtins map_types 2>/dev/null | \
+                                grep -v '^unspec$')"
                             COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) )
                             return 0
                             ;;
@@ -1039,14 +1032,8 @@ _bpftool()
                     return 0
                     ;;
                 attach|detach)
-                    local BPFTOOL_CGROUP_ATTACH_TYPES='cgroup_inet_ingress cgroup_inet_egress \
-                        cgroup_inet_sock_create cgroup_sock_ops cgroup_device cgroup_inet4_bind \
-                        cgroup_inet6_bind cgroup_inet4_post_bind cgroup_inet6_post_bind \
-                        cgroup_inet4_connect cgroup_inet6_connect cgroup_inet4_getpeername \
-                        cgroup_inet6_getpeername cgroup_inet4_getsockname cgroup_inet6_getsockname \
-                        cgroup_udp4_sendmsg cgroup_udp6_sendmsg cgroup_udp4_recvmsg \
-                        cgroup_udp6_recvmsg cgroup_sysctl cgroup_getsockopt cgroup_setsockopt \
-                        cgroup_inet_sock_release'
+                    local BPFTOOL_CGROUP_ATTACH_TYPES="$(bpftool feature list_builtins attach_types 2>/dev/null | \
+                        grep '^cgroup_')"
                     local ATTACH_FLAGS='multi override'
                     local PROG_TYPE='id pinned tag name'
                     # Check for $prev = $command first
@@ -1175,9 +1162,14 @@ _bpftool()
                     _bpftool_once_attr 'full unprivileged'
                     return 0
                     ;;
+                list_builtins)
+                    [[ $prev != "$command" ]] && return 0
+                    COMPREPLY=( $( compgen -W 'prog_types map_types \
+                        attach_types link_types helpers' -- "$cur" ) )
+                    ;;
                 *)
                     [[ $prev == $object ]] && \
-                        COMPREPLY=( $( compgen -W 'help probe' -- "$cur" ) )
+                        COMPREPLY=( $( compgen -W 'help list_builtins probe' -- "$cur" ) )
                     ;;
             esac
             ;;
index 42421fe..cced668 100644 (file)
@@ -15,6 +15,7 @@
 #include <unistd.h>
 
 #include <bpf/bpf.h>
+#include <bpf/btf.h>
 
 #include "main.h"
 
@@ -36,6 +37,8 @@
        "                        cgroup_inet_sock_release }"
 
 static unsigned int query_flags;
+static struct btf *btf_vmlinux;
+static __u32 btf_vmlinux_id;
 
 static enum bpf_attach_type parse_attach_type(const char *str)
 {
@@ -64,11 +67,38 @@ static enum bpf_attach_type parse_attach_type(const char *str)
        return __MAX_BPF_ATTACH_TYPE;
 }
 
+static void guess_vmlinux_btf_id(__u32 attach_btf_obj_id)
+{
+       struct bpf_btf_info btf_info = {};
+       __u32 btf_len = sizeof(btf_info);
+       char name[16] = {};
+       int err;
+       int fd;
+
+       btf_info.name = ptr_to_u64(name);
+       btf_info.name_len = sizeof(name);
+
+       fd = bpf_btf_get_fd_by_id(attach_btf_obj_id);
+       if (fd < 0)
+               return;
+
+       err = bpf_obj_get_info_by_fd(fd, &btf_info, &btf_len);
+       if (err)
+               goto out;
+
+       if (btf_info.kernel_btf && strncmp(name, "vmlinux", sizeof(name)) == 0)
+               btf_vmlinux_id = btf_info.id;
+
+out:
+       close(fd);
+}
+
 static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                         const char *attach_flags_str,
                         int level)
 {
        char prog_name[MAX_PROG_FULL_NAME];
+       const char *attach_btf_name = NULL;
        struct bpf_prog_info info = {};
        const char *attach_type_str;
        __u32 info_len = sizeof(info);
@@ -84,6 +114,20 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
        }
 
        attach_type_str = libbpf_bpf_attach_type_str(attach_type);
+
+       if (btf_vmlinux) {
+               if (!btf_vmlinux_id)
+                       guess_vmlinux_btf_id(info.attach_btf_obj_id);
+
+               if (btf_vmlinux_id == info.attach_btf_obj_id &&
+                   info.attach_btf_id < btf__type_cnt(btf_vmlinux)) {
+                       const struct btf_type *t =
+                               btf__type_by_id(btf_vmlinux, info.attach_btf_id);
+                       attach_btf_name =
+                               btf__name_by_offset(btf_vmlinux, t->name_off);
+               }
+       }
+
        get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name));
        if (json_output) {
                jsonw_start_object(json_wtr);
@@ -95,6 +139,10 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                jsonw_string_field(json_wtr, "attach_flags",
                                   attach_flags_str);
                jsonw_string_field(json_wtr, "name", prog_name);
+               if (attach_btf_name)
+                       jsonw_string_field(json_wtr, "attach_btf_name", attach_btf_name);
+               jsonw_uint_field(json_wtr, "attach_btf_obj_id", info.attach_btf_obj_id);
+               jsonw_uint_field(json_wtr, "attach_btf_id", info.attach_btf_id);
                jsonw_end_object(json_wtr);
        } else {
                printf("%s%-8u ", level ? "    " : "", info.id);
@@ -102,7 +150,13 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                        printf("%-15s", attach_type_str);
                else
                        printf("type %-10u", attach_type);
-               printf(" %-15s %-15s\n", attach_flags_str, prog_name);
+               printf(" %-15s %-15s", attach_flags_str, prog_name);
+               if (attach_btf_name)
+                       printf(" %-15s", attach_btf_name);
+               else if (info.attach_btf_id)
+                       printf(" attach_btf_obj_id=%d attach_btf_id=%d",
+                              info.attach_btf_obj_id, info.attach_btf_id);
+               printf("\n");
        }
 
        close(prog_fd);
@@ -144,40 +198,49 @@ static int cgroup_has_attached_progs(int cgroup_fd)
 static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
                                   int level)
 {
+       LIBBPF_OPTS(bpf_prog_query_opts, p);
+       __u32 prog_attach_flags[1024] = {0};
        const char *attach_flags_str;
        __u32 prog_ids[1024] = {0};
-       __u32 prog_cnt, iter;
-       __u32 attach_flags;
        char buf[32];
+       __u32 iter;
        int ret;
 
-       prog_cnt = ARRAY_SIZE(prog_ids);
-       ret = bpf_prog_query(cgroup_fd, type, query_flags, &attach_flags,
-                            prog_ids, &prog_cnt);
+       p.query_flags = query_flags;
+       p.prog_cnt = ARRAY_SIZE(prog_ids);
+       p.prog_ids = prog_ids;
+       p.prog_attach_flags = prog_attach_flags;
+
+       ret = bpf_prog_query_opts(cgroup_fd, type, &p);
        if (ret)
                return ret;
 
-       if (prog_cnt == 0)
+       if (p.prog_cnt == 0)
                return 0;
 
-       switch (attach_flags) {
-       case BPF_F_ALLOW_MULTI:
-               attach_flags_str = "multi";
-               break;
-       case BPF_F_ALLOW_OVERRIDE:
-               attach_flags_str = "override";
-               break;
-       case 0:
-               attach_flags_str = "";
-               break;
-       default:
-               snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags);
-               attach_flags_str = buf;
-       }
+       for (iter = 0; iter < p.prog_cnt; iter++) {
+               __u32 attach_flags;
+
+               attach_flags = prog_attach_flags[iter] ?: p.attach_flags;
+
+               switch (attach_flags) {
+               case BPF_F_ALLOW_MULTI:
+                       attach_flags_str = "multi";
+                       break;
+               case BPF_F_ALLOW_OVERRIDE:
+                       attach_flags_str = "override";
+                       break;
+               case 0:
+                       attach_flags_str = "";
+                       break;
+               default:
+                       snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags);
+                       attach_flags_str = buf;
+               }
 
-       for (iter = 0; iter < prog_cnt; iter++)
                show_bpf_prog(prog_ids[iter], type,
                              attach_flags_str, level);
+       }
 
        return 0;
 }
@@ -233,6 +296,7 @@ static int do_show(int argc, char **argv)
                printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType",
                       "AttachFlags", "Name");
 
+       btf_vmlinux = libbpf_find_kernel_btf();
        for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
                /*
                 * Not all attach types may be supported, so it's expected,
@@ -296,6 +360,7 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb,
                printf("%s\n", fpath);
        }
 
+       btf_vmlinux = libbpf_find_kernel_btf();
        for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
                show_attached_bpf_progs(cgroup_fd, type, ftw->level);
 
index a0d4acd..067e9ea 100644 (file)
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <linux/limits.h>
-#include <linux/magic.h>
 #include <net/if.h>
 #include <sys/mount.h>
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/vfs.h>
 
+#include <linux/filter.h>
+#include <linux/limits.h>
+#include <linux/magic.h>
+#include <linux/unistd.h>
+
 #include <bpf/bpf.h>
 #include <bpf/hashmap.h>
 #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
@@ -73,11 +76,73 @@ static bool is_bpffs(char *path)
        return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
 }
 
+/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to
+ * memcg-based memory accounting for BPF maps and programs. This was done in
+ * commit 97306be45fbe ("Merge branch 'switch to memcg-based memory
+ * accounting'"), in Linux 5.11.
+ *
+ * Libbpf also offers to probe for memcg-based accounting vs rlimit, but does
+ * so by checking for the availability of a given BPF helper and this has
+ * failed on some kernels with backports in the past, see commit 6b4384ff1088
+ * ("Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK"").
+ * Instead, we can probe by lowering the process-based rlimit to 0, trying to
+ * load a BPF object, and resetting the rlimit. If the load succeeds then
+ * memcg-based accounting is supported.
+ *
+ * This would be too dangerous to do in the library, because multithreaded
+ * applications might attempt to load items while the rlimit is at 0. Given
+ * that bpftool is single-threaded, this is fine to do here.
+ */
+static bool known_to_need_rlimit(void)
+{
+       struct rlimit rlim_init, rlim_cur_zero = {};
+       struct bpf_insn insns[] = {
+               BPF_MOV64_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       };
+       size_t insn_cnt = ARRAY_SIZE(insns);
+       union bpf_attr attr;
+       int prog_fd, err;
+
+       memset(&attr, 0, sizeof(attr));
+       attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+       attr.insns = ptr_to_u64(insns);
+       attr.insn_cnt = insn_cnt;
+       attr.license = ptr_to_u64("GPL");
+
+       if (getrlimit(RLIMIT_MEMLOCK, &rlim_init))
+               return false;
+
+       /* Drop the soft limit to zero. We maintain the hard limit to its
+        * current value, because lowering it would be a permanent operation
+        * for unprivileged users.
+        */
+       rlim_cur_zero.rlim_max = rlim_init.rlim_max;
+       if (setrlimit(RLIMIT_MEMLOCK, &rlim_cur_zero))
+               return false;
+
+       /* Do not use bpf_prog_load() from libbpf here, because it calls
+        * bump_rlimit_memlock(), interfering with the current probe.
+        */
+       prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+       err = errno;
+
+       /* reset soft rlimit to its initial value */
+       setrlimit(RLIMIT_MEMLOCK, &rlim_init);
+
+       if (prog_fd < 0)
+               return err == EPERM;
+
+       close(prog_fd);
+       return false;
+}
+
 void set_max_rlimit(void)
 {
        struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
 
-       setrlimit(RLIMIT_MEMLOCK, &rinf);
+       if (known_to_need_rlimit())
+               setrlimit(RLIMIT_MEMLOCK, &rinf);
 }
 
 static int
@@ -251,6 +316,7 @@ const char *get_fd_type_name(enum bpf_obj_type type)
                [BPF_OBJ_UNKNOWN]       = "unknown",
                [BPF_OBJ_PROG]          = "prog",
                [BPF_OBJ_MAP]           = "map",
+               [BPF_OBJ_LINK]          = "link",
        };
 
        if (type < 0 || type >= ARRAY_SIZE(names) || !names[type])
index bac4ef4..7ecabf7 100644 (file)
@@ -1258,6 +1258,58 @@ exit_close_json:
        return 0;
 }
 
+static const char *get_helper_name(unsigned int id)
+{
+       if (id >= ARRAY_SIZE(helper_name))
+               return NULL;
+
+       return helper_name[id];
+}
+
+static int do_list_builtins(int argc, char **argv)
+{
+       const char *(*get_name)(unsigned int id);
+       unsigned int id = 0;
+
+       if (argc < 1)
+               usage();
+
+       if (is_prefix(*argv, "prog_types")) {
+               get_name = (const char *(*)(unsigned int))libbpf_bpf_prog_type_str;
+       } else if (is_prefix(*argv, "map_types")) {
+               get_name = (const char *(*)(unsigned int))libbpf_bpf_map_type_str;
+       } else if (is_prefix(*argv, "attach_types")) {
+               get_name = (const char *(*)(unsigned int))libbpf_bpf_attach_type_str;
+       } else if (is_prefix(*argv, "link_types")) {
+               get_name = (const char *(*)(unsigned int))libbpf_bpf_link_type_str;
+       } else if (is_prefix(*argv, "helpers")) {
+               get_name = get_helper_name;
+       } else {
+               p_err("expected 'prog_types', 'map_types', 'attach_types', 'link_types' or 'helpers', got: %s", *argv);
+               return -1;
+       }
+
+       if (json_output)
+               jsonw_start_array(json_wtr);    /* root array */
+
+       while (true) {
+               const char *name;
+
+               name = get_name(id++);
+               if (!name)
+                       break;
+               if (json_output)
+                       jsonw_string(json_wtr, name);
+               else
+                       printf("%s\n", name);
+       }
+
+       if (json_output)
+               jsonw_end_array(json_wtr);      /* root array */
+
+       return 0;
+}
+
 static int do_help(int argc, char **argv)
 {
        if (json_output) {
@@ -1267,9 +1319,11 @@ static int do_help(int argc, char **argv)
 
        fprintf(stderr,
                "Usage: %1$s %2$s probe [COMPONENT] [full] [unprivileged] [macros [prefix PREFIX]]\n"
+               "       %1$s %2$s list_builtins GROUP\n"
                "       %1$s %2$s help\n"
                "\n"
                "       COMPONENT := { kernel | dev NAME }\n"
+               "       GROUP := { prog_types | map_types | attach_types | link_types | helpers }\n"
                "       " HELP_SPEC_OPTIONS " }\n"
                "",
                bin_name, argv[-2]);
@@ -1278,8 +1332,9 @@ static int do_help(int argc, char **argv)
 }
 
 static const struct cmd cmds[] = {
-       { "probe",      do_probe },
-       { "help",       do_help },
+       { "probe",              do_probe },
+       { "list_builtins",      do_list_builtins },
+       { "help",               do_help },
        { 0 }
 };
 
index 480cbd8..1cf53bb 100644 (file)
@@ -1762,6 +1762,7 @@ btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_poi
                }
                break;
        case BTF_KIND_CONST:
+       case BTF_KIND_RESTRICT:
        case BTF_KIND_VOLATILE:
        case BTF_KIND_TYPEDEF:
                err = btfgen_mark_type(info, btf_type->type, follow_pointers);
@@ -1856,6 +1857,112 @@ static int btfgen_record_field_relo(struct btfgen_info *info, struct bpf_core_sp
        return 0;
 }
 
+/* Mark types, members, and member types. Compared to btfgen_record_field_relo,
+ * this function does not rely on the target spec for inferring members, but
+ * uses the associated BTF.
+ *
+ * The `behind_ptr` argument is used to stop marking of composite types reached
+ * through a pointer. This way, we can keep BTF size in check while providing
+ * reasonable match semantics.
+ */
+static int btfgen_mark_type_match(struct btfgen_info *info, __u32 type_id, bool behind_ptr)
+{
+       const struct btf_type *btf_type;
+       struct btf *btf = info->src_btf;
+       struct btf_type *cloned_type;
+       int i, err;
+
+       if (type_id == 0)
+               return 0;
+
+       btf_type = btf__type_by_id(btf, type_id);
+       /* mark type on cloned BTF as used */
+       cloned_type = (struct btf_type *)btf__type_by_id(info->marked_btf, type_id);
+       cloned_type->name_off = MARKED;
+
+       switch (btf_kind(btf_type)) {
+       case BTF_KIND_UNKN:
+       case BTF_KIND_INT:
+       case BTF_KIND_FLOAT:
+       case BTF_KIND_ENUM:
+       case BTF_KIND_ENUM64:
+               break;
+       case BTF_KIND_STRUCT:
+       case BTF_KIND_UNION: {
+               struct btf_member *m = btf_members(btf_type);
+               __u16 vlen = btf_vlen(btf_type);
+
+               if (behind_ptr)
+                       break;
+
+               for (i = 0; i < vlen; i++, m++) {
+                       /* mark member */
+                       btfgen_mark_member(info, type_id, i);
+
+                       /* mark member's type */
+                       err = btfgen_mark_type_match(info, m->type, false);
+                       if (err)
+                               return err;
+               }
+               break;
+       }
+       case BTF_KIND_CONST:
+       case BTF_KIND_FWD:
+       case BTF_KIND_RESTRICT:
+       case BTF_KIND_TYPEDEF:
+       case BTF_KIND_VOLATILE:
+               return btfgen_mark_type_match(info, btf_type->type, behind_ptr);
+       case BTF_KIND_PTR:
+               return btfgen_mark_type_match(info, btf_type->type, true);
+       case BTF_KIND_ARRAY: {
+               struct btf_array *array;
+
+               array = btf_array(btf_type);
+               /* mark array type */
+               err = btfgen_mark_type_match(info, array->type, false);
+               /* mark array's index type */
+               err = err ? : btfgen_mark_type_match(info, array->index_type, false);
+               if (err)
+                       return err;
+               break;
+       }
+       case BTF_KIND_FUNC_PROTO: {
+               __u16 vlen = btf_vlen(btf_type);
+               struct btf_param *param;
+
+               /* mark ret type */
+               err = btfgen_mark_type_match(info, btf_type->type, false);
+               if (err)
+                       return err;
+
+               /* mark parameters types */
+               param = btf_params(btf_type);
+               for (i = 0; i < vlen; i++) {
+                       err = btfgen_mark_type_match(info, param->type, false);
+                       if (err)
+                               return err;
+                       param++;
+               }
+               break;
+       }
+       /* tells if some other type needs to be handled */
+       default:
+               p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/* Mark types, members, and member types. Compared to btfgen_record_field_relo,
+ * this function does not rely on the target spec for inferring members, but
+ * uses the associated BTF.
+ */
+static int btfgen_record_type_match_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
+{
+       return btfgen_mark_type_match(info, targ_spec->root_type_id, false);
+}
+
 static int btfgen_record_type_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
 {
        return btfgen_mark_type(info, targ_spec->root_type_id, true);
@@ -1882,6 +1989,8 @@ static int btfgen_record_reloc(struct btfgen_info *info, struct bpf_core_spec *r
        case BPF_CORE_TYPE_EXISTS:
        case BPF_CORE_TYPE_SIZE:
                return btfgen_record_type_relo(info, res);
+       case BPF_CORE_TYPE_MATCHES:
+               return btfgen_record_type_match_relo(info, res);
        case BPF_CORE_ENUMVAL_EXISTS:
        case BPF_CORE_ENUMVAL_VALUE:
                return btfgen_record_enumval_relo(info, res);
index 589cb76..5e5060c 100644 (file)
@@ -63,8 +63,6 @@ static inline void *u64_to_ptr(__u64 ptr)
 #define HELP_SPEC_LINK                                                 \
        "LINK := { id LINK_ID | pinned FILE }"
 
-extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE];
-
 /* keep in sync with the definition in skeleton/pid_iter.bpf.c */
 enum bpf_obj_type {
        BPF_OBJ_UNKNOWN,
index 57890b3..71e54b1 100644 (file)
@@ -73,7 +73,7 @@ asm(                                                  \
 __BTF_ID_LIST(name, local)                             \
 extern u32 name[];
 
-#define BTF_ID_LIST_GLOBAL(name)                       \
+#define BTF_ID_LIST_GLOBAL(name, n)                    \
 __BTF_ID_LIST(name, globl)
 
 /* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with
@@ -82,6 +82,9 @@ __BTF_ID_LIST(name, globl)
 #define BTF_ID_LIST_SINGLE(name, prefix, typename)     \
        BTF_ID_LIST(name) \
        BTF_ID(prefix, typename)
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \
+       BTF_ID_LIST_GLOBAL(name, 1)                       \
+       BTF_ID(prefix, typename)
 
 /*
  * The BTF_ID_UNUSED macro defines 4 zero bytes.
@@ -143,13 +146,14 @@ extern struct btf_id_set name;
 
 #else
 
-#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
 #define BTF_ID(prefix, name)
 #define BTF_ID_UNUSED
-#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
-#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
-#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
-#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1];
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1];
+#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 };
 #define BTF_SET_END(name)
 
 #endif /* CONFIG_DEBUG_INFO_BTF */
@@ -172,7 +176,10 @@ extern struct btf_id_set name;
        BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock)          \
        BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)                    \
        BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)                      \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)                    \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)                    \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock)                  \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCKET, socket)
 
 enum {
 #define BTF_SOCK_TYPE(name, str) name,
@@ -184,4 +191,18 @@ MAX_BTF_SOCK_TYPE,
 extern u32 btf_sock_ids[];
 #endif
 
+#define BTF_TRACING_TYPE_xxx   \
+       BTF_TRACING_TYPE(BTF_TRACING_TYPE_TASK, task_struct)    \
+       BTF_TRACING_TYPE(BTF_TRACING_TYPE_FILE, file)           \
+       BTF_TRACING_TYPE(BTF_TRACING_TYPE_VMA, vm_area_struct)
+
+enum {
+#define BTF_TRACING_TYPE(name, type) name,
+BTF_TRACING_TYPE_xxx
+#undef BTF_TRACING_TYPE
+MAX_BTF_TRACING_TYPE,
+};
+
+extern u32 btf_tracing_ids[];
+
 #endif
index e813628..379e68f 100644 (file)
@@ -998,6 +998,7 @@ enum bpf_attach_type {
        BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
        BPF_PERF_EVENT,
        BPF_TRACE_KPROBE_MULTI,
+       BPF_LSM_CGROUP,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -1431,6 +1432,7 @@ union bpf_attr {
                __u32           attach_flags;
                __aligned_u64   prog_ids;
                __u32           prog_cnt;
+               __aligned_u64   prog_attach_flags; /* output: per-program attach_flags */
        } query;
 
        struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
@@ -6075,6 +6077,8 @@ struct bpf_prog_info {
        __u64 run_cnt;
        __u64 recursion_misses;
        __u32 verified_insns;
+       __u32 attach_btf_obj_id;
+       __u32 attach_btf_id;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -6782,6 +6786,7 @@ enum bpf_core_relo_kind {
        BPF_CORE_TYPE_SIZE = 9,              /* type size in bytes */
        BPF_CORE_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
        BPF_CORE_ENUMVAL_VALUE = 11,         /* enum value integer value */
+       BPF_CORE_TYPE_MATCHES = 12,          /* type match in target kernel */
 };
 
 /*
index 31a1a90..5a3dfb5 100644 (file)
@@ -1,4 +1,4 @@
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
-           netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
+           netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
            btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
            usdt.o
index a1265b1..4c904ef 100644 (file)
@@ -237,7 +237,7 @@ install_lib: all_cmd
                $(call do_install_mkdir,$(libdir_SQ)); \
                cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
 
-SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h      \
+SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h            \
            bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h         \
            skel_internal.h libbpf_version.h usdt.bpf.h
 GEN_HDRS := $(BPF_GENERATED)
index 240186a..5eb0df9 100644 (file)
@@ -147,10 +147,6 @@ int bump_rlimit_memlock(void)
 {
        struct rlimit rlim;
 
-       /* this the default in libbpf 1.0, but for now user has to opt-in explicitly */
-       if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK))
-               return 0;
-
        /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
        if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT))
                return 0;
@@ -233,11 +229,10 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt,
        return info;
 }
 
-DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0)
-int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type,
-                        const char *prog_name, const char *license,
-                        const struct bpf_insn *insns, size_t insn_cnt,
-                        const struct bpf_prog_load_opts *opts)
+int bpf_prog_load(enum bpf_prog_type prog_type,
+                 const char *prog_name, const char *license,
+                 const struct bpf_insn *insns, size_t insn_cnt,
+                 const struct bpf_prog_load_opts *opts)
 {
        void *finfo = NULL, *linfo = NULL;
        const char *func_info, *line_info;
@@ -384,94 +379,6 @@ done:
        return libbpf_err_errno(fd);
 }
 
-__attribute__((alias("bpf_load_program_xattr2")))
-int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
-                          char *log_buf, size_t log_buf_sz);
-
-static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr,
-                                  char *log_buf, size_t log_buf_sz)
-{
-       LIBBPF_OPTS(bpf_prog_load_opts, p);
-
-       if (!load_attr || !log_buf != !log_buf_sz)
-               return libbpf_err(-EINVAL);
-
-       p.expected_attach_type = load_attr->expected_attach_type;
-       switch (load_attr->prog_type) {
-       case BPF_PROG_TYPE_STRUCT_OPS:
-       case BPF_PROG_TYPE_LSM:
-               p.attach_btf_id = load_attr->attach_btf_id;
-               break;
-       case BPF_PROG_TYPE_TRACING:
-       case BPF_PROG_TYPE_EXT:
-               p.attach_btf_id = load_attr->attach_btf_id;
-               p.attach_prog_fd = load_attr->attach_prog_fd;
-               break;
-       default:
-               p.prog_ifindex = load_attr->prog_ifindex;
-               p.kern_version = load_attr->kern_version;
-       }
-       p.log_level = load_attr->log_level;
-       p.log_buf = log_buf;
-       p.log_size = log_buf_sz;
-       p.prog_btf_fd = load_attr->prog_btf_fd;
-       p.func_info_rec_size = load_attr->func_info_rec_size;
-       p.func_info_cnt = load_attr->func_info_cnt;
-       p.func_info = load_attr->func_info;
-       p.line_info_rec_size = load_attr->line_info_rec_size;
-       p.line_info_cnt = load_attr->line_info_cnt;
-       p.line_info = load_attr->line_info;
-       p.prog_flags = load_attr->prog_flags;
-
-       return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license,
-                            load_attr->insns, load_attr->insns_cnt, &p);
-}
-
-int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
-                    size_t insns_cnt, const char *license,
-                    __u32 kern_version, char *log_buf,
-                    size_t log_buf_sz)
-{
-       struct bpf_load_program_attr load_attr;
-
-       memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
-       load_attr.prog_type = type;
-       load_attr.expected_attach_type = 0;
-       load_attr.name = NULL;
-       load_attr.insns = insns;
-       load_attr.insns_cnt = insns_cnt;
-       load_attr.license = license;
-       load_attr.kern_version = kern_version;
-
-       return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz);
-}
-
-int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
-                      size_t insns_cnt, __u32 prog_flags, const char *license,
-                      __u32 kern_version, char *log_buf, size_t log_buf_sz,
-                      int log_level)
-{
-       union bpf_attr attr;
-       int fd;
-
-       bump_rlimit_memlock();
-
-       memset(&attr, 0, sizeof(attr));
-       attr.prog_type = type;
-       attr.insn_cnt = (__u32)insns_cnt;
-       attr.insns = ptr_to_u64(insns);
-       attr.license = ptr_to_u64(license);
-       attr.log_buf = ptr_to_u64(log_buf);
-       attr.log_size = log_buf_sz;
-       attr.log_level = log_level;
-       log_buf[0] = 0;
-       attr.kern_version = kern_version;
-       attr.prog_flags = prog_flags;
-
-       fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS);
-       return libbpf_err_errno(fd);
-}
-
 int bpf_map_update_elem(int fd, const void *key, const void *value,
                        __u64 flags)
 {
@@ -888,80 +795,48 @@ int bpf_iter_create(int link_fd)
        return libbpf_err_errno(fd);
 }
 
-int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
-                  __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
+int bpf_prog_query_opts(int target_fd,
+                       enum bpf_attach_type type,
+                       struct bpf_prog_query_opts *opts)
 {
        union bpf_attr attr;
        int ret;
 
+       if (!OPTS_VALID(opts, bpf_prog_query_opts))
+               return libbpf_err(-EINVAL);
+
        memset(&attr, 0, sizeof(attr));
+
        attr.query.target_fd    = target_fd;
        attr.query.attach_type  = type;
-       attr.query.query_flags  = query_flags;
-       attr.query.prog_cnt     = *prog_cnt;
-       attr.query.prog_ids     = ptr_to_u64(prog_ids);
+       attr.query.query_flags  = OPTS_GET(opts, query_flags, 0);
+       attr.query.prog_cnt     = OPTS_GET(opts, prog_cnt, 0);
+       attr.query.prog_ids     = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL));
+       attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL));
 
        ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
 
-       if (attach_flags)
-               *attach_flags = attr.query.attach_flags;
-       *prog_cnt = attr.query.prog_cnt;
-
-       return libbpf_err_errno(ret);
-}
-
-int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
-                     void *data_out, __u32 *size_out, __u32 *retval,
-                     __u32 *duration)
-{
-       union bpf_attr attr;
-       int ret;
-
-       memset(&attr, 0, sizeof(attr));
-       attr.test.prog_fd = prog_fd;
-       attr.test.data_in = ptr_to_u64(data);
-       attr.test.data_out = ptr_to_u64(data_out);
-       attr.test.data_size_in = size;
-       attr.test.repeat = repeat;
-
-       ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
-
-       if (size_out)
-               *size_out = attr.test.data_size_out;
-       if (retval)
-               *retval = attr.test.retval;
-       if (duration)
-               *duration = attr.test.duration;
+       OPTS_SET(opts, attach_flags, attr.query.attach_flags);
+       OPTS_SET(opts, prog_cnt, attr.query.prog_cnt);
 
        return libbpf_err_errno(ret);
 }
 
-int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
+int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
+                  __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
 {
-       union bpf_attr attr;
+       LIBBPF_OPTS(bpf_prog_query_opts, opts);
        int ret;
 
-       if (!test_attr->data_out && test_attr->data_size_out > 0)
-               return libbpf_err(-EINVAL);
-
-       memset(&attr, 0, sizeof(attr));
-       attr.test.prog_fd = test_attr->prog_fd;
-       attr.test.data_in = ptr_to_u64(test_attr->data_in);
-       attr.test.data_out = ptr_to_u64(test_attr->data_out);
-       attr.test.data_size_in = test_attr->data_size_in;
-       attr.test.data_size_out = test_attr->data_size_out;
-       attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
-       attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
-       attr.test.ctx_size_in = test_attr->ctx_size_in;
-       attr.test.ctx_size_out = test_attr->ctx_size_out;
-       attr.test.repeat = test_attr->repeat;
+       opts.query_flags = query_flags;
+       opts.prog_ids = prog_ids;
+       opts.prog_cnt = *prog_cnt;
 
-       ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+       ret = bpf_prog_query_opts(target_fd, type, &opts);
 
-       test_attr->data_size_out = attr.test.data_size_out;
-       test_attr->ctx_size_out = attr.test.ctx_size_out;
-       test_attr->retval = attr.test.retval;
-       test_attr->duration = attr.test.duration;
+       if (attach_flags)
+               *attach_flags = opts.attach_flags;
+       *prog_cnt = opts.prog_cnt;
 
        return libbpf_err_errno(ret);
 }
@@ -1162,27 +1037,6 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_loa
        return libbpf_err_errno(fd);
 }
 
-int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log)
-{
-       LIBBPF_OPTS(bpf_btf_load_opts, opts);
-       int fd;
-
-retry:
-       if (do_log && log_buf && log_buf_size) {
-               opts.log_buf = log_buf;
-               opts.log_size = log_buf_size;
-               opts.log_level = 1;
-       }
-
-       fd = bpf_btf_load(btf, btf_size, &opts);
-       if (fd < 0 && !do_log && log_buf && log_buf_size) {
-               do_log = true;
-               goto retry;
-       }
-
-       return libbpf_err_errno(fd);
-}
-
 int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
                      __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
                      __u64 *probe_addr)
index cabc037..88a7cc4 100644 (file)
@@ -103,54 +103,6 @@ LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
                             const char *prog_name, const char *license,
                             const struct bpf_insn *insns, size_t insn_cnt,
                             const struct bpf_prog_load_opts *opts);
-/* this "specialization" should go away in libbpf 1.0 */
-LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type,
-                                   const char *prog_name, const char *license,
-                                   const struct bpf_insn *insns, size_t insn_cnt,
-                                   const struct bpf_prog_load_opts *opts);
-
-/* This is an elaborate way to not conflict with deprecated bpf_prog_load()
- * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone.
- * With this approach, if someone is calling bpf_prog_load() with
- * 4 arguments, they will use the deprecated API, which keeps backwards
- * compatibility (both source code and binary). If bpf_prog_load() is called
- * with 6 arguments, though, it gets redirected to __bpf_prog_load.
- * So looking forward to libbpf 1.0 when this hack will be gone and
- * __bpf_prog_load() will be called just bpf_prog_load().
- */
-#ifndef bpf_prog_load
-#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__)
-#define ___bpf_prog_load4(file, type, pobj, prog_fd) \
-       bpf_prog_load_deprecated(file, type, pobj, prog_fd)
-#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \
-       bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts)
-#endif /* bpf_prog_load */
-
-struct bpf_load_program_attr {
-       enum bpf_prog_type prog_type;
-       enum bpf_attach_type expected_attach_type;
-       const char *name;
-       const struct bpf_insn *insns;
-       size_t insns_cnt;
-       const char *license;
-       union {
-               __u32 kern_version;
-               __u32 attach_prog_fd;
-       };
-       union {
-               __u32 prog_ifindex;
-               __u32 attach_btf_id;
-       };
-       __u32 prog_btf_fd;
-       __u32 func_info_rec_size;
-       const void *func_info;
-       __u32 func_info_cnt;
-       __u32 line_info_rec_size;
-       const void *line_info;
-       __u32 line_info_cnt;
-       __u32 log_level;
-       __u32 prog_flags;
-};
 
 /* Flags to direct loading requirements */
 #define MAPS_RELAX_COMPAT      0x01
@@ -158,22 +110,6 @@ struct bpf_load_program_attr {
 /* Recommended log buffer size */
 #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */
 
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
-LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
-                                     char *log_buf, size_t log_buf_sz);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
-LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
-                               const struct bpf_insn *insns, size_t insns_cnt,
-                               const char *license, __u32 kern_version,
-                               char *log_buf, size_t log_buf_sz);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
-LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
-                                 const struct bpf_insn *insns,
-                                 size_t insns_cnt, __u32 prog_flags,
-                                 const char *license, __u32 kern_version,
-                                 char *log_buf, size_t log_buf_sz,
-                                 int log_level);
-
 struct bpf_btf_load_opts {
        size_t sz; /* size of this struct for forward/backward compatibility */
 
@@ -187,10 +123,6 @@ struct bpf_btf_load_opts {
 LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
                            const struct bpf_btf_load_opts *opts);
 
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead")
-LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
-                           __u32 log_buf_size, bool do_log);
-
 LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
                                   __u64 flags);
 
@@ -353,10 +285,6 @@ LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
 LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
                                     enum bpf_attach_type type,
                                     const struct bpf_prog_attach_opts *opts);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead")
-LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
-                                    enum bpf_attach_type type,
-                                    const struct bpf_prog_attach_opts *opts);
 LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
 LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
                                enum bpf_attach_type type);
@@ -422,17 +350,6 @@ struct bpf_prog_test_run_attr {
                             * out: length of cxt_out */
 };
 
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
-LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
-
-/*
- * bpf_prog_test_run does not check that data_out is large enough. Consider
- * using bpf_prog_test_run_opts instead.
- */
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
-LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
-                                __u32 size, void *data_out, __u32 *size_out,
-                                __u32 *retval, __u32 *duration);
 LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
@@ -442,9 +359,24 @@ LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
+
+struct bpf_prog_query_opts {
+       size_t sz; /* size of this struct for forward/backward compatibility */
+       __u32 query_flags;
+       __u32 attach_flags; /* output argument */
+       __u32 *prog_ids;
+       __u32 prog_cnt; /* input+output argument */
+       __u32 *prog_attach_flags;
+};
+#define bpf_prog_query_opts__last_field prog_attach_flags
+
+LIBBPF_API int bpf_prog_query_opts(int target_fd,
+                                  enum bpf_attach_type type,
+                                  struct bpf_prog_query_opts *opts);
 LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
                              __u32 query_flags, __u32 *attach_flags,
                              __u32 *prog_ids, __u32 *prog_cnt);
+
 LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
 LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
                                 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
index fd48b1f..496e6a8 100644 (file)
@@ -29,6 +29,7 @@ enum bpf_type_id_kind {
 enum bpf_type_info_kind {
        BPF_TYPE_EXISTS = 0,            /* type existence in target kernel */
        BPF_TYPE_SIZE = 1,              /* type size in target kernel */
+       BPF_TYPE_MATCHES = 2,           /* type match in target kernel */
 };
 
 /* second argument to __builtin_preserve_enum_value() built-in */
@@ -183,6 +184,16 @@ enum bpf_enum_value_kind {
 #define bpf_core_type_exists(type)                                         \
        __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)
 
+/*
+ * Convenience macro to check that provided named type
+ * (struct/union/enum/typedef) "matches" that in a target kernel.
+ * Returns:
+ *    1, if the type matches in the target kernel's BTF;
+ *    0, if the type does not match any in the target kernel
+ */
+#define bpf_core_type_matches(type)                                        \
+       __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_MATCHES)
+
 /*
  * Convenience macro to get the byte size of a provided named type
  * (struct/union/enum/typedef) in a target kernel.
index fb04eaf..7349b16 100644 (file)
  * To allow use of SEC() with externs (e.g., for extern .maps declarations),
  * make sure __attribute__((unused)) doesn't trigger compilation warning.
  */
+#if __GNUC__ && !__clang__
+
+/*
+ * Pragma macros are broken on GCC
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55578
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90400
+ */
+#define SEC(name) __attribute__((section(name), used))
+
+#else
+
 #define SEC(name) \
        _Pragma("GCC diagnostic push")                                      \
        _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"")          \
        __attribute__((section(name), used))                                \
        _Pragma("GCC diagnostic pop")                                       \
 
+#endif
+
 /* Avoid 'linux/stddef.h' definition of '__always_inline'. */
 #undef __always_inline
 #define __always_inline inline __attribute__((always_inline))
index 01ce121..11f9096 100644 (file)
@@ -233,7 +233,7 @@ struct pt_regs___arm64 {
 #define __PT_PARM5_REG a4
 #define __PT_RET_REG ra
 #define __PT_FP_REG s0
-#define __PT_RC_REG a5
+#define __PT_RC_REG a0
 #define __PT_SP_REG sp
 #define __PT_IP_REG pc
 /* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */
index ae1520f..2d14f1a 100644 (file)
@@ -448,11 +448,6 @@ static int btf_parse_type_sec(struct btf *btf)
        return 0;
 }
 
-__u32 btf__get_nr_types(const struct btf *btf)
-{
-       return btf->start_id + btf->nr_types - 1;
-}
-
 __u32 btf__type_cnt(const struct btf *btf)
 {
        return btf->start_id + btf->nr_types;
@@ -1408,92 +1403,6 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)
        return btf__load_from_kernel_by_id_split(id, NULL);
 }
 
-int btf__get_from_id(__u32 id, struct btf **btf)
-{
-       struct btf *res;
-       int err;
-
-       *btf = NULL;
-       res = btf__load_from_kernel_by_id(id);
-       err = libbpf_get_error(res);
-
-       if (err)
-               return libbpf_err(err);
-
-       *btf = res;
-       return 0;
-}
-
-int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
-                        __u32 expected_key_size, __u32 expected_value_size,
-                        __u32 *key_type_id, __u32 *value_type_id)
-{
-       const struct btf_type *container_type;
-       const struct btf_member *key, *value;
-       const size_t max_name = 256;
-       char container_name[max_name];
-       __s64 key_size, value_size;
-       __s32 container_id;
-
-       if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) {
-               pr_warn("map:%s length of '____btf_map_%s' is too long\n",
-                       map_name, map_name);
-               return libbpf_err(-EINVAL);
-       }
-
-       container_id = btf__find_by_name(btf, container_name);
-       if (container_id < 0) {
-               pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
-                        map_name, container_name);
-               return libbpf_err(container_id);
-       }
-
-       container_type = btf__type_by_id(btf, container_id);
-       if (!container_type) {
-               pr_warn("map:%s cannot find BTF type for container_id:%u\n",
-                       map_name, container_id);
-               return libbpf_err(-EINVAL);
-       }
-
-       if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
-               pr_warn("map:%s container_name:%s is an invalid container struct\n",
-                       map_name, container_name);
-               return libbpf_err(-EINVAL);
-       }
-
-       key = btf_members(container_type);
-       value = key + 1;
-
-       key_size = btf__resolve_size(btf, key->type);
-       if (key_size < 0) {
-               pr_warn("map:%s invalid BTF key_type_size\n", map_name);
-               return libbpf_err(key_size);
-       }
-
-       if (expected_key_size != key_size) {
-               pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
-                       map_name, (__u32)key_size, expected_key_size);
-               return libbpf_err(-EINVAL);
-       }
-
-       value_size = btf__resolve_size(btf, value->type);
-       if (value_size < 0) {
-               pr_warn("map:%s invalid BTF value_type_size\n", map_name);
-               return libbpf_err(value_size);
-       }
-
-       if (expected_value_size != value_size) {
-               pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
-                       map_name, (__u32)value_size, expected_value_size);
-               return libbpf_err(-EINVAL);
-       }
-
-       *key_type_id = key->type;
-       *value_type_id = value->type;
-
-       return 0;
-}
-
 static void btf_invalidate_raw_data(struct btf *btf)
 {
        if (btf->raw_data) {
@@ -2965,81 +2874,6 @@ const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
        return btf_ext->data;
 }
 
-static int btf_ext_reloc_info(const struct btf *btf,
-                             const struct btf_ext_info *ext_info,
-                             const char *sec_name, __u32 insns_cnt,
-                             void **info, __u32 *cnt)
-{
-       __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec);
-       __u32 i, record_size, existing_len, records_len;
-       struct btf_ext_info_sec *sinfo;
-       const char *info_sec_name;
-       __u64 remain_len;
-       void *data;
-
-       record_size = ext_info->rec_size;
-       sinfo = ext_info->info;
-       remain_len = ext_info->len;
-       while (remain_len > 0) {
-               records_len = sinfo->num_info * record_size;
-               info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
-               if (strcmp(info_sec_name, sec_name)) {
-                       remain_len -= sec_hdrlen + records_len;
-                       sinfo = (void *)sinfo + sec_hdrlen + records_len;
-                       continue;
-               }
-
-               existing_len = (*cnt) * record_size;
-               data = realloc(*info, existing_len + records_len);
-               if (!data)
-                       return libbpf_err(-ENOMEM);
-
-               memcpy(data + existing_len, sinfo->data, records_len);
-               /* adjust insn_off only, the rest data will be passed
-                * to the kernel.
-                */
-               for (i = 0; i < sinfo->num_info; i++) {
-                       __u32 *insn_off;
-
-                       insn_off = data + existing_len + (i * record_size);
-                       *insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt;
-               }
-               *info = data;
-               *cnt += sinfo->num_info;
-               return 0;
-       }
-
-       return libbpf_err(-ENOENT);
-}
-
-int btf_ext__reloc_func_info(const struct btf *btf,
-                            const struct btf_ext *btf_ext,
-                            const char *sec_name, __u32 insns_cnt,
-                            void **func_info, __u32 *cnt)
-{
-       return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name,
-                                 insns_cnt, func_info, cnt);
-}
-
-int btf_ext__reloc_line_info(const struct btf *btf,
-                            const struct btf_ext *btf_ext,
-                            const char *sec_name, __u32 insns_cnt,
-                            void **line_info, __u32 *cnt)
-{
-       return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name,
-                                 insns_cnt, line_info, cnt);
-}
-
-__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext)
-{
-       return btf_ext->func_info.rec_size;
-}
-
-__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
-{
-       return btf_ext->line_info.rec_size;
-}
-
 struct btf_dedup;
 
 static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts);
@@ -3189,9 +3023,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d);
  * deduplicating structs/unions is described in greater details in comments for
  * `btf_dedup_is_equiv` function.
  */
-
-DEFAULT_VERSION(btf__dedup_v0_6_0, btf__dedup, LIBBPF_0.6.0)
-int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts)
+int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
 {
        struct btf_dedup *d;
        int err;
@@ -3251,19 +3083,6 @@ done:
        return libbpf_err(err);
 }
 
-COMPAT_VERSION(btf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2)
-int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts)
-{
-       LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext);
-
-       if (unused_opts) {
-               pr_warn("please use new version of btf__dedup() that supports options\n");
-               return libbpf_err(-ENOTSUP);
-       }
-
-       return btf__dedup(btf, &opts);
-}
-
 #define BTF_UNPROCESSED_ID ((__u32)-1)
 #define BTF_IN_PROGRESS_ID ((__u32)-2)
 
index 9fb416e..583760d 100644 (file)
@@ -120,20 +120,12 @@ LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
 
 LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
 LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
-LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead")
-LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
 
-LIBBPF_DEPRECATED_SINCE(0, 6, "intended for internal libbpf use only")
-LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
-LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead")
-LIBBPF_API int btf__load(struct btf *btf);
 LIBBPF_API int btf__load_into_kernel(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
                                   const char *type_name);
 LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
                                        const char *type_name, __u32 kind);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__type_cnt() instead; note that btf__get_nr_types() == btf__type_cnt() - 1")
-LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
 LIBBPF_API __u32 btf__type_cnt(const struct btf *btf);
 LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf);
 LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
@@ -150,29 +142,10 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
 LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
-LIBBPF_DEPRECATED_SINCE(0, 7, "this API is not necessary when BTF-defined maps are used")
-LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
-                                   __u32 expected_key_size,
-                                   __u32 expected_value_size,
-                                   __u32 *key_type_id, __u32 *value_type_id);
 
 LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size);
 LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
 LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
-int btf_ext__reloc_func_info(const struct btf *btf,
-                            const struct btf_ext *btf_ext,
-                            const char *sec_name, __u32 insns_cnt,
-                            void **func_info, __u32 *cnt);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
-int btf_ext__reloc_line_info(const struct btf *btf,
-                            const struct btf_ext *btf_ext,
-                            const char *sec_name, __u32 insns_cnt,
-                            void **line_info, __u32 *cnt);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info is deprecated; write custom func_info parsing to fetch rec_size")
-__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info is deprecated; write custom line_info parsing to fetch rec_size")
-__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
 
 LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
 LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
@@ -259,22 +232,12 @@ struct btf_dedup_opts {
 
 LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
 
-LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead")
-LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts);
-#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__)
-#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts)
-#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts)
-
 struct btf_dump;
 
 struct btf_dump_opts {
-       union {
-               size_t sz;
-               void *ctx; /* DEPRECATED: will be gone in v1.0 */
-       };
+       size_t sz;
 };
+#define btf_dump_opts__last_field sz
 
 typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
 
@@ -283,51 +246,6 @@ LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
                                          void *ctx,
                                          const struct btf_dump_opts *opts);
 
-LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf,
-                                                btf_dump_printf_fn_t printf_fn,
-                                                void *ctx,
-                                                const struct btf_dump_opts *opts);
-
-LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf,
-                                                    const struct btf_ext *btf_ext,
-                                                    const struct btf_dump_opts *opts,
-                                                    btf_dump_printf_fn_t printf_fn);
-
-/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the
- * type of 4th argument. If it's btf_dump's print callback, use deprecated
- * API; otherwise, choose the new btf_dump__new(). ___libbpf_override()
- * doesn't work here because both variants have 4 input arguments.
- *
- * (void *) casts are necessary to avoid compilation warnings about type
- * mismatches, because even though __builtin_choose_expr() only ever evaluates
- * one side the other side still has to satisfy type constraints (this is
- * compiler implementation limitation which might be lifted eventually,
- * according to the documentation). So passing struct btf_ext in place of
- * btf_dump_printf_fn_t would be generating compilation warning.  Casting to
- * void * avoids this issue.
- *
- * Also, two type compatibility checks for a function and function pointer are
- * required because passing function reference into btf_dump__new() as
- * btf_dump__new(..., my_callback, ...) and as btf_dump__new(...,
- * &my_callback, ...) (not explicit ampersand in the latter case) actually
- * differs as far as __builtin_types_compatible_p() is concerned. Thus two
- * checks are combined to detect callback argument.
- *
- * The rest works just like in case of ___libbpf_override() usage with symbol
- * versioning.
- *
- * C++ compilers don't support __builtin_types_compatible_p(), so at least
- * don't screw up compilation for them and let C++ users pick btf_dump__new
- * vs btf_dump__new_deprecated explicitly.
- */
-#ifndef __cplusplus
-#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr(                           \
-       __builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) ||               \
-       __builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)),  \
-       btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4),       \
-       btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4))
-#endif
-
 LIBBPF_API void btf_dump__free(struct btf_dump *d);
 
 LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
index f5275f8..400e84f 100644 (file)
@@ -144,15 +144,17 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
 static int btf_dump_mark_referenced(struct btf_dump *d);
 static int btf_dump_resize(struct btf_dump *d);
 
-DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0)
-struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf,
-                                     btf_dump_printf_fn_t printf_fn,
-                                     void *ctx,
-                                     const struct btf_dump_opts *opts)
+struct btf_dump *btf_dump__new(const struct btf *btf,
+                              btf_dump_printf_fn_t printf_fn,
+                              void *ctx,
+                              const struct btf_dump_opts *opts)
 {
        struct btf_dump *d;
        int err;
 
+       if (!OPTS_VALID(opts, btf_dump_opts))
+               return libbpf_err_ptr(-EINVAL);
+
        if (!printf_fn)
                return libbpf_err_ptr(-EINVAL);
 
@@ -188,17 +190,6 @@ err:
        return libbpf_err_ptr(err);
 }
 
-COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4)
-struct btf_dump *btf_dump__new_deprecated(const struct btf *btf,
-                                         const struct btf_ext *btf_ext,
-                                         const struct btf_dump_opts *opts,
-                                         btf_dump_printf_fn_t printf_fn)
-{
-       if (!printf_fn)
-               return libbpf_err_ptr(-EINVAL);
-       return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts);
-}
-
 static int btf_dump_resize(struct btf_dump *d)
 {
        int err, last_id = btf__type_cnt(d->btf) - 1;
index 49e359c..cb49408 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/bpf.h>
 #include <linux/btf.h>
 #include <linux/filter.h>
-#include <linux/list.h>
 #include <linux/limits.h>
 #include <linux/perf_event.h>
 #include <linux/ring_buffer.h>
@@ -107,6 +106,7 @@ static const char * const attach_type_name[] = {
        [BPF_TRACE_FEXIT]               = "trace_fexit",
        [BPF_MODIFY_RETURN]             = "modify_return",
        [BPF_LSM_MAC]                   = "lsm_mac",
+       [BPF_LSM_CGROUP]                = "lsm_cgroup",
        [BPF_SK_LOOKUP]                 = "sk_lookup",
        [BPF_TRACE_ITER]                = "trace_iter",
        [BPF_XDP_DEVMAP]                = "xdp_devmap",
@@ -279,12 +279,9 @@ static inline __u64 ptr_to_u64(const void *ptr)
        return (__u64) (unsigned long) ptr;
 }
 
-/* this goes away in libbpf 1.0 */
-enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
-
 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
 {
-       libbpf_mode = mode;
+       /* as of v1.0 libbpf_set_strict_mode() is a no-op */
        return 0;
 }
 
@@ -347,12 +344,8 @@ enum sec_def_flags {
        SEC_ATTACH_BTF = 4,
        /* BPF program type allows sleeping/blocking in kernel */
        SEC_SLEEPABLE = 8,
-       /* allow non-strict prefix matching */
-       SEC_SLOPPY_PFX = 16,
        /* BPF program support non-linear XDP buffer */
-       SEC_XDP_FRAGS = 32,
-       /* deprecated sec definitions not supposed to be used */
-       SEC_DEPRECATED = 64,
+       SEC_XDP_FRAGS = 16,
 };
 
 struct bpf_sec_def {
@@ -372,9 +365,10 @@ struct bpf_sec_def {
  * linux/filter.h.
  */
 struct bpf_program {
-       const struct bpf_sec_def *sec_def;
+       char *name;
        char *sec_name;
        size_t sec_idx;
+       const struct bpf_sec_def *sec_def;
        /* this program's instruction offset (in number of instructions)
         * within its containing ELF section
         */
@@ -394,12 +388,6 @@ struct bpf_program {
         */
        size_t sub_insn_off;
 
-       char *name;
-       /* name with / replaced by _; makes recursive pinning
-        * in bpf_object__pin_programs easier
-        */
-       char *pin_name;
-
        /* instructions that belong to BPF program; insns[0] is located at
         * sec_insn_off instruction within its ELF section in ELF file, so
         * when mapping ELF file instruction index to the local instruction,
@@ -420,24 +408,19 @@ struct bpf_program {
        size_t log_size;
        __u32 log_level;
 
-       struct {
-               int nr;
-               int *fds;
-       } instances;
-       bpf_program_prep_t preprocessor;
-
        struct bpf_object *obj;
-       void *priv;
-       bpf_program_clear_priv_t clear_priv;
 
+       int fd;
        bool autoload;
        bool mark_btf_static;
        enum bpf_prog_type type;
        enum bpf_attach_type expected_attach_type;
+
        int prog_ifindex;
        __u32 attach_btf_obj_fd;
        __u32 attach_btf_id;
        __u32 attach_prog_fd;
+
        void *func_info;
        __u32 func_info_rec_size;
        __u32 func_info_cnt;
@@ -484,6 +467,14 @@ enum libbpf_map_type {
        LIBBPF_MAP_KCONFIG,
 };
 
+struct bpf_map_def {
+       unsigned int type;
+       unsigned int key_size;
+       unsigned int value_size;
+       unsigned int max_entries;
+       unsigned int map_flags;
+};
+
 struct bpf_map {
        struct bpf_object *obj;
        char *name;
@@ -504,8 +495,6 @@ struct bpf_map {
        __u32 btf_key_type_id;
        __u32 btf_value_type_id;
        __u32 btf_vmlinux_value_type_id;
-       void *priv;
-       bpf_map_clear_priv_t clear_priv;
        enum libbpf_map_type libbpf_type;
        void *mmaped;
        struct bpf_struct_ops *st_ops;
@@ -568,8 +557,6 @@ struct extern_desc {
        };
 };
 
-static LIST_HEAD(bpf_objects_list);
-
 struct module_btf {
        struct btf *btf;
        char *name;
@@ -638,12 +625,6 @@ struct bpf_object {
 
        /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
        struct elf_state efile;
-       /*
-        * All loaded bpf_object are linked in a list, which is
-        * hidden to caller. bpf_objects__<func> handlers deal with
-        * all objects.
-        */
-       struct list_head list;
 
        struct btf *btf;
        struct btf_ext *btf_ext;
@@ -669,9 +650,6 @@ struct bpf_object {
        size_t log_size;
        __u32 log_level;
 
-       void *priv;
-       bpf_object_clear_priv_t clear_priv;
-
        int *fd_array;
        size_t fd_array_cap;
        size_t fd_array_cnt;
@@ -693,25 +671,10 @@ static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
 
 void bpf_program__unload(struct bpf_program *prog)
 {
-       int i;
-
        if (!prog)
                return;
 
-       /*
-        * If the object is opened but the program was never loaded,
-        * it is possible that prog->instances.nr == -1.
-        */
-       if (prog->instances.nr > 0) {
-               for (i = 0; i < prog->instances.nr; i++)
-                       zclose(prog->instances.fds[i]);
-       } else if (prog->instances.nr != -1) {
-               pr_warn("Internal error: instances.nr is %d\n",
-                       prog->instances.nr);
-       }
-
-       prog->instances.nr = -1;
-       zfree(&prog->instances.fds);
+       zclose(prog->fd);
 
        zfree(&prog->func_info);
        zfree(&prog->line_info);
@@ -722,16 +685,9 @@ static void bpf_program__exit(struct bpf_program *prog)
        if (!prog)
                return;
 
-       if (prog->clear_priv)
-               prog->clear_priv(prog, prog->priv);
-
-       prog->priv = NULL;
-       prog->clear_priv = NULL;
-
        bpf_program__unload(prog);
        zfree(&prog->name);
        zfree(&prog->sec_name);
-       zfree(&prog->pin_name);
        zfree(&prog->insns);
        zfree(&prog->reloc_desc);
 
@@ -740,26 +696,6 @@ static void bpf_program__exit(struct bpf_program *prog)
        prog->sec_idx = -1;
 }
 
-static char *__bpf_program__pin_name(struct bpf_program *prog)
-{
-       char *name, *p;
-
-       if (libbpf_mode & LIBBPF_STRICT_SEC_NAME)
-               name = strdup(prog->name);
-       else
-               name = strdup(prog->sec_name);
-
-       if (!name)
-               return NULL;
-
-       p = name;
-
-       while ((p = strchr(p, '/')))
-               *p = '_';
-
-       return name;
-}
-
 static bool insn_is_subprog_call(const struct bpf_insn *insn)
 {
        return BPF_CLASS(insn->code) == BPF_JMP &&
@@ -801,6 +737,7 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
        prog->insns_cnt = prog->sec_insn_cnt;
 
        prog->type = BPF_PROG_TYPE_UNSPEC;
+       prog->fd = -1;
 
        /* libbpf's convention for SEC("?abc...") is that it's just like
         * SEC("abc...") but the corresponding bpf_program starts out with
@@ -814,9 +751,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
                prog->autoload = true;
        }
 
-       prog->instances.fds = NULL;
-       prog->instances.nr = -1;
-
        /* inherit object's log_level */
        prog->log_level = obj->log_level;
 
@@ -828,10 +762,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
        if (!prog->name)
                goto errout;
 
-       prog->pin_name = __bpf_program__pin_name(prog);
-       if (!prog->pin_name)
-               goto errout;
-
        prog->insns = malloc(insn_data_sz);
        if (!prog->insns)
                goto errout;
@@ -1313,7 +1243,6 @@ static struct bpf_object *bpf_object__new(const char *path,
                                          size_t obj_buf_sz,
                                          const char *obj_name)
 {
-       bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST);
        struct bpf_object *obj;
        char *end;
 
@@ -1351,9 +1280,6 @@ static struct bpf_object *bpf_object__new(const char *path,
        obj->kern_version = get_kernel_version();
        obj->loaded = false;
 
-       INIT_LIST_HEAD(&obj->list);
-       if (!strict)
-               list_add(&obj->list, &bpf_objects_list);
        return obj;
 }
 
@@ -1386,10 +1312,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
        }
 
        if (obj->efile.obj_buf_sz > 0) {
-               /*
-                * obj_buf should have been validated by
-                * bpf_object__open_buffer().
-                */
+               /* obj_buf should have been validated by bpf_object__open_mem(). */
                elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
        } else {
                obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
@@ -2052,143 +1975,6 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj)
        return 0;
 }
 
-static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
-{
-       Elf_Data *symbols = obj->efile.symbols;
-       int i, map_def_sz = 0, nr_maps = 0, nr_syms;
-       Elf_Data *data = NULL;
-       Elf_Scn *scn;
-
-       if (obj->efile.maps_shndx < 0)
-               return 0;
-
-       if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) {
-               pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n");
-               return -EOPNOTSUPP;
-       }
-
-       if (!symbols)
-               return -EINVAL;
-
-       scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
-       data = elf_sec_data(obj, scn);
-       if (!scn || !data) {
-               pr_warn("elf: failed to get legacy map definitions for %s\n",
-                       obj->path);
-               return -EINVAL;
-       }
-
-       /*
-        * Count number of maps. Each map has a name.
-        * Array of maps is not supported: only the first element is
-        * considered.
-        *
-        * TODO: Detect array of map and report error.
-        */
-       nr_syms = symbols->d_size / sizeof(Elf64_Sym);
-       for (i = 0; i < nr_syms; i++) {
-               Elf64_Sym *sym = elf_sym_by_idx(obj, i);
-
-               if (sym->st_shndx != obj->efile.maps_shndx)
-                       continue;
-               if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION)
-                       continue;
-               nr_maps++;
-       }
-       /* Assume equally sized map definitions */
-       pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
-                nr_maps, data->d_size, obj->path);
-
-       if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
-               pr_warn("elf: unable to determine legacy map definition size in %s\n",
-                       obj->path);
-               return -EINVAL;
-       }
-       map_def_sz = data->d_size / nr_maps;
-
-       /* Fill obj->maps using data in "maps" section.  */
-       for (i = 0; i < nr_syms; i++) {
-               Elf64_Sym *sym = elf_sym_by_idx(obj, i);
-               const char *map_name;
-               struct bpf_map_def *def;
-               struct bpf_map *map;
-
-               if (sym->st_shndx != obj->efile.maps_shndx)
-                       continue;
-               if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION)
-                       continue;
-
-               map = bpf_object__add_map(obj);
-               if (IS_ERR(map))
-                       return PTR_ERR(map);
-
-               map_name = elf_sym_str(obj, sym->st_name);
-               if (!map_name) {
-                       pr_warn("failed to get map #%d name sym string for obj %s\n",
-                               i, obj->path);
-                       return -LIBBPF_ERRNO__FORMAT;
-               }
-
-               pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name);
-
-               if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
-                       pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
-                       return -ENOTSUP;
-               }
-
-               map->libbpf_type = LIBBPF_MAP_UNSPEC;
-               map->sec_idx = sym->st_shndx;
-               map->sec_offset = sym->st_value;
-               pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
-                        map_name, map->sec_idx, map->sec_offset);
-               if (sym->st_value + map_def_sz > data->d_size) {
-                       pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
-                               obj->path, map_name);
-                       return -EINVAL;
-               }
-
-               map->name = strdup(map_name);
-               if (!map->name) {
-                       pr_warn("map '%s': failed to alloc map name\n", map_name);
-                       return -ENOMEM;
-               }
-               pr_debug("map %d is \"%s\"\n", i, map->name);
-               def = (struct bpf_map_def *)(data->d_buf + sym->st_value);
-               /*
-                * If the definition of the map in the object file fits in
-                * bpf_map_def, copy it.  Any extra fields in our version
-                * of bpf_map_def will default to zero as a result of the
-                * calloc above.
-                */
-               if (map_def_sz <= sizeof(struct bpf_map_def)) {
-                       memcpy(&map->def, def, map_def_sz);
-               } else {
-                       /*
-                        * Here the map structure being read is bigger than what
-                        * we expect, truncate if the excess bits are all zero.
-                        * If they are not zero, reject this map as
-                        * incompatible.
-                        */
-                       char *b;
-
-                       for (b = ((char *)def) + sizeof(struct bpf_map_def);
-                            b < ((char *)def) + map_def_sz; b++) {
-                               if (*b != 0) {
-                                       pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
-                                               obj->path, map_name);
-                                       if (strict)
-                                               return -EINVAL;
-                               }
-                       }
-                       memcpy(&map->def, def, sizeof(struct bpf_map_def));
-               }
-
-               /* btf info may not exist but fill it in if it does exist */
-               (void) bpf_map_find_btf_info(obj, map);
-       }
-       return 0;
-}
-
 const struct btf_type *
 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
 {
@@ -2306,6 +2092,13 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
        return bpf_map__set_pin_path(map, buf);
 }
 
+/* should match definition in bpf_helpers.h */
+enum libbpf_pin_type {
+       LIBBPF_PIN_NONE,
+       /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+       LIBBPF_PIN_BY_NAME,
+};
+
 int parse_btf_map_def(const char *map_name, struct btf *btf,
                      const struct btf_type *def_t, bool strict,
                      struct btf_map_def *map_def, struct btf_map_def *inner_def)
@@ -2738,12 +2531,11 @@ static int bpf_object__init_maps(struct bpf_object *obj,
 {
        const char *pin_root_path;
        bool strict;
-       int err;
+       int err = 0;
 
        strict = !OPTS_GET(opts, relaxed_maps, false);
        pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
 
-       err = bpf_object__init_user_maps(obj, strict);
        err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
        err = err ?: bpf_object__init_global_data_maps(obj);
        err = err ?: bpf_object__init_kconfig_map(obj);
@@ -3061,11 +2853,6 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
        return libbpf_err(err);
 }
 
-int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
-{
-       return btf_finalize_data(obj, btf);
-}
-
 static int bpf_object__finalize_btf(struct bpf_object *obj)
 {
        int err;
@@ -4022,41 +3809,8 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
        return 0;
 }
 
-struct bpf_program *
-bpf_object__find_program_by_title(const struct bpf_object *obj,
-                                 const char *title)
+static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
 {
-       struct bpf_program *pos;
-
-       bpf_object__for_each_program(pos, obj) {
-               if (pos->sec_name && !strcmp(pos->sec_name, title))
-                       return pos;
-       }
-       return errno = ENOENT, NULL;
-}
-
-static bool prog_is_subprog(const struct bpf_object *obj,
-                           const struct bpf_program *prog)
-{
-       /* For legacy reasons, libbpf supports an entry-point BPF programs
-        * without SEC() attribute, i.e., those in the .text section. But if
-        * there are 2 or more such programs in the .text section, they all
-        * must be subprograms called from entry-point BPF programs in
-        * designated SEC()'tions, otherwise there is no way to distinguish
-        * which of those programs should be loaded vs which are a subprogram.
-        * Similarly, if there is a function/program in .text and at least one
-        * other BPF program with custom SEC() attribute, then we just assume
-        * .text programs are subprograms (even if they are not called from
-        * other programs), because libbpf never explicitly supported mixing
-        * SEC()-designated BPF programs and .text entry-point BPF programs.
-        *
-        * In libbpf 1.0 strict mode, we always consider .text
-        * programs to be subprograms.
-        */
-
-       if (libbpf_mode & LIBBPF_STRICT_SEC_NAME)
-               return prog->sec_idx == obj->efile.text_shndx;
-
        return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
 }
 
@@ -4397,9 +4151,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat
 
 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
 {
-       struct bpf_map_def *def = &map->def;
-       __u32 key_type_id = 0, value_type_id = 0;
-       int ret;
+       int id;
 
        if (!obj->btf)
                return -ENOENT;
@@ -4408,31 +4160,22 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
         * For struct_ops map, it does not need btf_key_type_id and
         * btf_value_type_id.
         */
-       if (map->sec_idx == obj->efile.btf_maps_shndx ||
-           bpf_map__is_struct_ops(map))
+       if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
                return 0;
 
-       if (!bpf_map__is_internal(map)) {
-               pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n");
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-               ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
-                                          def->value_size, &key_type_id,
-                                          &value_type_id);
-#pragma GCC diagnostic pop
-       } else {
-               /*
-                * LLVM annotates global data differently in BTF, that is,
-                * only as '.data', '.bss' or '.rodata'.
-                */
-               ret = btf__find_by_name(obj->btf, map->real_name);
-       }
-       if (ret < 0)
-               return ret;
+       /*
+        * LLVM annotates global data differently in BTF, that is,
+        * only as '.data', '.bss' or '.rodata'.
+        */
+       if (!bpf_map__is_internal(map))
+               return -ENOENT;
+
+       id = btf__find_by_name(obj->btf, map->real_name);
+       if (id < 0)
+               return id;
 
-       map->btf_key_type_id = key_type_id;
-       map->btf_value_type_id = bpf_map__is_internal(map) ?
-                                ret : value_type_id;
+       map->btf_key_type_id = 0;
+       map->btf_value_type_id = id;
        return 0;
 }
 
@@ -4564,14 +4307,6 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
        return 0;
 }
 
-int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
-{
-       if (!map || !max_entries)
-               return libbpf_err(-EINVAL);
-
-       return bpf_map__set_max_entries(map, max_entries);
-}
-
 static int
 bpf_object__probe_loading(struct bpf_object *obj)
 {
@@ -5732,77 +5467,13 @@ err_out:
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
                              const struct btf *targ_btf, __u32 targ_id)
 {
-       const struct btf_type *local_type, *targ_type;
-       int depth = 32; /* max recursion depth */
-
-       /* caller made sure that names match (ignoring flavor suffix) */
-       local_type = btf__type_by_id(local_btf, local_id);
-       targ_type = btf__type_by_id(targ_btf, targ_id);
-       if (!btf_kind_core_compat(local_type, targ_type))
-               return 0;
-
-recur:
-       depth--;
-       if (depth < 0)
-               return -EINVAL;
-
-       local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
-       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
-       if (!local_type || !targ_type)
-               return -EINVAL;
-
-       if (!btf_kind_core_compat(local_type, targ_type))
-               return 0;
-
-       switch (btf_kind(local_type)) {
-       case BTF_KIND_UNKN:
-       case BTF_KIND_STRUCT:
-       case BTF_KIND_UNION:
-       case BTF_KIND_ENUM:
-       case BTF_KIND_ENUM64:
-       case BTF_KIND_FWD:
-               return 1;
-       case BTF_KIND_INT:
-               /* just reject deprecated bitfield-like integers; all other
-                * integers are by default compatible between each other
-                */
-               return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
-       case BTF_KIND_PTR:
-               local_id = local_type->type;
-               targ_id = targ_type->type;
-               goto recur;
-       case BTF_KIND_ARRAY:
-               local_id = btf_array(local_type)->type;
-               targ_id = btf_array(targ_type)->type;
-               goto recur;
-       case BTF_KIND_FUNC_PROTO: {
-               struct btf_param *local_p = btf_params(local_type);
-               struct btf_param *targ_p = btf_params(targ_type);
-               __u16 local_vlen = btf_vlen(local_type);
-               __u16 targ_vlen = btf_vlen(targ_type);
-               int i, err;
-
-               if (local_vlen != targ_vlen)
-                       return 0;
-
-               for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
-                       skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
-                       skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
-                       err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
-                       if (err <= 0)
-                               return err;
-               }
+       return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
+}
 
-               /* tail recurse for return type check */
-               skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
-               skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
-               goto recur;
-       }
-       default:
-               pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
-                       btf_kind_str(local_type), local_id, targ_id);
-               return 0;
-       }
+int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
+                        const struct btf *targ_btf, __u32 targ_id)
+{
+       return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
 }
 
 static size_t bpf_core_hash_fn(const void *key, void *ctx)
@@ -6926,11 +6597,6 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog,
        if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
                opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
 
-       if (def & SEC_DEPRECATED) {
-               pr_warn("SEC(\"%s\") is deprecated, please see https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations for details\n",
-                       prog->sec_name);
-       }
-
        if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
                int btf_obj_fd = 0, btf_type_id = 0, err;
                const char *attach_name;
@@ -6973,10 +6639,9 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog,
 
 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
 
-static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog,
-                                        struct bpf_insn *insns, int insns_cnt,
-                                        const char *license, __u32 kern_version,
-                                        int *prog_fd)
+static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
+                               struct bpf_insn *insns, int insns_cnt,
+                               const char *license, __u32 kern_version, int *prog_fd)
 {
        LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
        const char *prog_name = NULL;
@@ -7343,93 +7008,6 @@ static int bpf_program_record_relos(struct bpf_program *prog)
        return 0;
 }
 
-static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
-                               const char *license, __u32 kern_ver)
-{
-       int err = 0, fd, i;
-
-       if (obj->loaded) {
-               pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
-               return libbpf_err(-EINVAL);
-       }
-
-       if (prog->instances.nr < 0 || !prog->instances.fds) {
-               if (prog->preprocessor) {
-                       pr_warn("Internal error: can't load program '%s'\n",
-                               prog->name);
-                       return libbpf_err(-LIBBPF_ERRNO__INTERNAL);
-               }
-
-               prog->instances.fds = malloc(sizeof(int));
-               if (!prog->instances.fds) {
-                       pr_warn("Not enough memory for BPF fds\n");
-                       return libbpf_err(-ENOMEM);
-               }
-               prog->instances.nr = 1;
-               prog->instances.fds[0] = -1;
-       }
-
-       if (!prog->preprocessor) {
-               if (prog->instances.nr != 1) {
-                       pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
-                               prog->name, prog->instances.nr);
-               }
-               if (obj->gen_loader)
-                       bpf_program_record_relos(prog);
-               err = bpf_object_load_prog_instance(obj, prog,
-                                                   prog->insns, prog->insns_cnt,
-                                                   license, kern_ver, &fd);
-               if (!err)
-                       prog->instances.fds[0] = fd;
-               goto out;
-       }
-
-       for (i = 0; i < prog->instances.nr; i++) {
-               struct bpf_prog_prep_result result;
-               bpf_program_prep_t preprocessor = prog->preprocessor;
-
-               memset(&result, 0, sizeof(result));
-               err = preprocessor(prog, i, prog->insns,
-                                  prog->insns_cnt, &result);
-               if (err) {
-                       pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
-                               i, prog->name);
-                       goto out;
-               }
-
-               if (!result.new_insn_ptr || !result.new_insn_cnt) {
-                       pr_debug("Skip loading the %dth instance of program '%s'\n",
-                                i, prog->name);
-                       prog->instances.fds[i] = -1;
-                       if (result.pfd)
-                               *result.pfd = -1;
-                       continue;
-               }
-
-               err = bpf_object_load_prog_instance(obj, prog,
-                                                   result.new_insn_ptr, result.new_insn_cnt,
-                                                   license, kern_ver, &fd);
-               if (err) {
-                       pr_warn("Loading the %dth instance of program '%s' failed\n",
-                               i, prog->name);
-                       goto out;
-               }
-
-               if (result.pfd)
-                       *result.pfd = fd;
-               prog->instances.fds[i] = fd;
-       }
-out:
-       if (err)
-               pr_warn("failed to load program '%s'\n", prog->name);
-       return libbpf_err(err);
-}
-
-int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_ver)
-{
-       return bpf_object_load_prog(prog->obj, prog, license, kern_ver);
-}
-
 static int
 bpf_object__load_progs(struct bpf_object *obj, int log_level)
 {
@@ -7453,9 +7031,16 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
                        continue;
                }
                prog->log_level |= log_level;
-               err = bpf_object_load_prog(obj, prog, obj->license, obj->kern_version);
-               if (err)
+
+               if (obj->gen_loader)
+                       bpf_program_record_relos(prog);
+
+               err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
+                                          obj->license, obj->kern_version, &prog->fd);
+               if (err) {
+                       pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
                        return err;
+               }
        }
 
        bpf_object__free_relocs(obj);
@@ -7481,13 +7066,6 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
                prog->type = prog->sec_def->prog_type;
                prog->expected_attach_type = prog->sec_def->expected_attach_type;
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-               if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
-                   prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
-                       prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
-#pragma GCC diagnostic pop
-
                /* sec_def can have custom callback which should be called
                 * after bpf_program is initialized to adjust its properties
                 */
@@ -7593,36 +7171,6 @@ out:
        return ERR_PTR(err);
 }
 
-static struct bpf_object *
-__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
-{
-       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
-               .relaxed_maps = flags & MAPS_RELAX_COMPAT,
-       );
-
-       /* param validation */
-       if (!attr->file)
-               return NULL;
-
-       pr_debug("loading %s\n", attr->file);
-       return bpf_object_open(attr->file, NULL, 0, &opts);
-}
-
-struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
-{
-       return libbpf_ptr(__bpf_object__open_xattr(attr, 0));
-}
-
-struct bpf_object *bpf_object__open(const char *path)
-{
-       struct bpf_object_open_attr attr = {
-               .file           = path,
-               .prog_type      = BPF_PROG_TYPE_UNSPEC,
-       };
-
-       return libbpf_ptr(__bpf_object__open_xattr(&attr, 0));
-}
-
 struct bpf_object *
 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
 {
@@ -7634,6 +7182,11 @@ bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
        return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
 }
 
+struct bpf_object *bpf_object__open(const char *path)
+{
+       return bpf_object__open_file(path, NULL);
+}
+
 struct bpf_object *
 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
                     const struct bpf_object_open_opts *opts)
@@ -7644,23 +7197,6 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
        return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
 }
 
-struct bpf_object *
-bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
-                       const char *name)
-{
-       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
-               .object_name = name,
-               /* wrong default, but backwards-compatible */
-               .relaxed_maps = true,
-       );
-
-       /* returning NULL is wrong, but backwards-compatible */
-       if (!obj_buf || obj_buf_sz == 0)
-               return errno = EINVAL, NULL;
-
-       return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, &opts));
-}
-
 static int bpf_object_unload(struct bpf_object *obj)
 {
        size_t i;
@@ -8093,11 +7629,6 @@ out:
        return libbpf_err(err);
 }
 
-int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
-{
-       return bpf_object_load(attr->obj, attr->log_level, attr->target_btf_path);
-}
-
 int bpf_object__load(struct bpf_object *obj)
 {
        return bpf_object_load(obj, 0, NULL);
@@ -8155,11 +7686,16 @@ static int check_path(const char *path)
        return err;
 }
 
-static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, int instance)
+int bpf_program__pin(struct bpf_program *prog, const char *path)
 {
        char *cp, errmsg[STRERR_BUFSIZE];
        int err;
 
+       if (prog->fd < 0) {
+               pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
+               return libbpf_err(-EINVAL);
+       }
+
        err = make_parent_dir(path);
        if (err)
                return libbpf_err(err);
@@ -8168,170 +7704,35 @@ static int bpf_program_pin_instance(struct bpf_program *prog, const char *path,
        if (err)
                return libbpf_err(err);
 
-       if (prog == NULL) {
-               pr_warn("invalid program pointer\n");
-               return libbpf_err(-EINVAL);
-       }
-
-       if (instance < 0 || instance >= prog->instances.nr) {
-               pr_warn("invalid prog instance %d of prog %s (max %d)\n",
-                       instance, prog->name, prog->instances.nr);
-               return libbpf_err(-EINVAL);
-       }
-
-       if (bpf_obj_pin(prog->instances.fds[instance], path)) {
+       if (bpf_obj_pin(prog->fd, path)) {
                err = -errno;
                cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
-               pr_warn("failed to pin program: %s\n", cp);
+               pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
                return libbpf_err(err);
        }
-       pr_debug("pinned program '%s'\n", path);
 
+       pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
        return 0;
 }
 
-static int bpf_program_unpin_instance(struct bpf_program *prog, const char *path, int instance)
+int bpf_program__unpin(struct bpf_program *prog, const char *path)
 {
        int err;
 
-       err = check_path(path);
-       if (err)
-               return libbpf_err(err);
-
-       if (prog == NULL) {
-               pr_warn("invalid program pointer\n");
-               return libbpf_err(-EINVAL);
-       }
-
-       if (instance < 0 || instance >= prog->instances.nr) {
-               pr_warn("invalid prog instance %d of prog %s (max %d)\n",
-                       instance, prog->name, prog->instances.nr);
-               return libbpf_err(-EINVAL);
-       }
-
-       err = unlink(path);
-       if (err != 0)
-               return libbpf_err(-errno);
-
-       pr_debug("unpinned program '%s'\n", path);
-
-       return 0;
-}
-
-__attribute__((alias("bpf_program_pin_instance")))
-int bpf_object__pin_instance(struct bpf_program *prog, const char *path, int instance);
-
-__attribute__((alias("bpf_program_unpin_instance")))
-int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance);
-
-int bpf_program__pin(struct bpf_program *prog, const char *path)
-{
-       int i, err;
-
-       err = make_parent_dir(path);
-       if (err)
-               return libbpf_err(err);
-
-       err = check_path(path);
-       if (err)
-               return libbpf_err(err);
-
-       if (prog == NULL) {
-               pr_warn("invalid program pointer\n");
+       if (prog->fd < 0) {
+               pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
                return libbpf_err(-EINVAL);
        }
 
-       if (prog->instances.nr <= 0) {
-               pr_warn("no instances of prog %s to pin\n", prog->name);
-               return libbpf_err(-EINVAL);
-       }
-
-       if (prog->instances.nr == 1) {
-               /* don't create subdirs when pinning single instance */
-               return bpf_program_pin_instance(prog, path, 0);
-       }
-
-       for (i = 0; i < prog->instances.nr; i++) {
-               char buf[PATH_MAX];
-               int len;
-
-               len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
-               if (len < 0) {
-                       err = -EINVAL;
-                       goto err_unpin;
-               } else if (len >= PATH_MAX) {
-                       err = -ENAMETOOLONG;
-                       goto err_unpin;
-               }
-
-               err = bpf_program_pin_instance(prog, buf, i);
-               if (err)
-                       goto err_unpin;
-       }
-
-       return 0;
-
-err_unpin:
-       for (i = i - 1; i >= 0; i--) {
-               char buf[PATH_MAX];
-               int len;
-
-               len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
-               if (len < 0)
-                       continue;
-               else if (len >= PATH_MAX)
-                       continue;
-
-               bpf_program_unpin_instance(prog, buf, i);
-       }
-
-       rmdir(path);
-
-       return libbpf_err(err);
-}
-
-int bpf_program__unpin(struct bpf_program *prog, const char *path)
-{
-       int i, err;
-
        err = check_path(path);
        if (err)
                return libbpf_err(err);
 
-       if (prog == NULL) {
-               pr_warn("invalid program pointer\n");
-               return libbpf_err(-EINVAL);
-       }
-
-       if (prog->instances.nr <= 0) {
-               pr_warn("no instances of prog %s to pin\n", prog->name);
-               return libbpf_err(-EINVAL);
-       }
-
-       if (prog->instances.nr == 1) {
-               /* don't create subdirs when pinning single instance */
-               return bpf_program_unpin_instance(prog, path, 0);
-       }
-
-       for (i = 0; i < prog->instances.nr; i++) {
-               char buf[PATH_MAX];
-               int len;
-
-               len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
-               if (len < 0)
-                       return libbpf_err(-EINVAL);
-               else if (len >= PATH_MAX)
-                       return libbpf_err(-ENAMETOOLONG);
-
-               err = bpf_program_unpin_instance(prog, buf, i);
-               if (err)
-                       return err;
-       }
-
-       err = rmdir(path);
+       err = unlink(path);
        if (err)
                return libbpf_err(-errno);
 
+       pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
        return 0;
 }
 
@@ -8578,8 +7979,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
                char buf[PATH_MAX];
                int len;
 
-               len = snprintf(buf, PATH_MAX, "%s/%s", path,
-                              prog->pin_name);
+               len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
                if (len < 0) {
                        err = -EINVAL;
                        goto err_unpin_programs;
@@ -8600,8 +8000,7 @@ err_unpin_programs:
                char buf[PATH_MAX];
                int len;
 
-               len = snprintf(buf, PATH_MAX, "%s/%s", path,
-                              prog->pin_name);
+               len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
                if (len < 0)
                        continue;
                else if (len >= PATH_MAX)
@@ -8625,8 +8024,7 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
                char buf[PATH_MAX];
                int len;
 
-               len = snprintf(buf, PATH_MAX, "%s/%s", path,
-                              prog->pin_name);
+               len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
                if (len < 0)
                        return libbpf_err(-EINVAL);
                else if (len >= PATH_MAX)
@@ -8659,11 +8057,6 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
 
 static void bpf_map__destroy(struct bpf_map *map)
 {
-       if (map->clear_priv)
-               map->clear_priv(map, map->priv);
-       map->priv = NULL;
-       map->clear_priv = NULL;
-
        if (map->inner_map) {
                bpf_map__destroy(map->inner_map);
                zfree(&map->inner_map);
@@ -8699,9 +8092,6 @@ void bpf_object__close(struct bpf_object *obj)
        if (IS_ERR_OR_NULL(obj))
                return;
 
-       if (obj->clear_priv)
-               obj->clear_priv(obj, obj->priv);
-
        usdt_manager_free(obj->usdt_man);
        obj->usdt_man = NULL;
 
@@ -8728,33 +8118,9 @@ void bpf_object__close(struct bpf_object *obj)
        }
        zfree(&obj->programs);
 
-       list_del(&obj->list);
        free(obj);
 }
 
-struct bpf_object *
-bpf_object__next(struct bpf_object *prev)
-{
-       struct bpf_object *next;
-       bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST);
-
-       if (strict)
-               return NULL;
-
-       if (!prev)
-               next = list_first_entry(&bpf_objects_list,
-                                       struct bpf_object,
-                                       list);
-       else
-               next = list_next_entry(prev, list);
-
-       /* Empty list is noticed here so don't need checking on entry. */
-       if (&next->list == &bpf_objects_list)
-               return NULL;
-
-       return next;
-}
-
 const char *bpf_object__name(const struct bpf_object *obj)
 {
        return obj ? obj->name : libbpf_err_ptr(-EINVAL);
@@ -8785,22 +8151,6 @@ int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
        return 0;
 }
 
-int bpf_object__set_priv(struct bpf_object *obj, void *priv,
-                        bpf_object_clear_priv_t clear_priv)
-{
-       if (obj->priv && obj->clear_priv)
-               obj->clear_priv(obj, obj->priv);
-
-       obj->priv = priv;
-       obj->clear_priv = clear_priv;
-       return 0;
-}
-
-void *bpf_object__priv(const struct bpf_object *obj)
-{
-       return obj ? obj->priv : libbpf_err_ptr(-EINVAL);
-}
-
 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
 {
        struct bpf_gen *gen;
@@ -8843,12 +8193,6 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
        return &obj->programs[idx];
 }
 
-struct bpf_program *
-bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
-{
-       return bpf_object__next_program(obj, prev);
-}
-
 struct bpf_program *
 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
 {
@@ -8861,12 +8205,6 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
        return prog;
 }
 
-struct bpf_program *
-bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
-{
-       return bpf_object__prev_program(obj, next);
-}
-
 struct bpf_program *
 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
 {
@@ -8879,22 +8217,6 @@ bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
        return prog;
 }
 
-int bpf_program__set_priv(struct bpf_program *prog, void *priv,
-                         bpf_program_clear_priv_t clear_priv)
-{
-       if (prog->priv && prog->clear_priv)
-               prog->clear_priv(prog, prog->priv);
-
-       prog->priv = priv;
-       prog->clear_priv = clear_priv;
-       return 0;
-}
-
-void *bpf_program__priv(const struct bpf_program *prog)
-{
-       return prog ? prog->priv : libbpf_err_ptr(-EINVAL);
-}
-
 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
 {
        prog->prog_ifindex = ifindex;
@@ -8910,22 +8232,6 @@ const char *bpf_program__section_name(const struct bpf_program *prog)
        return prog->sec_name;
 }
 
-const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
-{
-       const char *title;
-
-       title = prog->sec_name;
-       if (needs_copy) {
-               title = strdup(title);
-               if (!title) {
-                       pr_warn("failed to strdup program title\n");
-                       return libbpf_err_ptr(-ENOMEM);
-               }
-       }
-
-       return title;
-}
-
 bool bpf_program__autoload(const struct bpf_program *prog)
 {
        return prog->autoload;
@@ -8940,18 +8246,6 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
        return 0;
 }
 
-static int bpf_program_nth_fd(const struct bpf_program *prog, int n);
-
-int bpf_program__fd(const struct bpf_program *prog)
-{
-       return bpf_program_nth_fd(prog, 0);
-}
-
-size_t bpf_program__size(const struct bpf_program *prog)
-{
-       return prog->insns_cnt * BPF_INSN_SZ;
-}
-
 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
 {
        return prog->insns;
@@ -8982,58 +8276,15 @@ int bpf_program__set_insns(struct bpf_program *prog,
        return 0;
 }
 
-int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
-                         bpf_program_prep_t prep)
-{
-       int *instances_fds;
-
-       if (nr_instances <= 0 || !prep)
-               return libbpf_err(-EINVAL);
-
-       if (prog->instances.nr > 0 || prog->instances.fds) {
-               pr_warn("Can't set pre-processor after loading\n");
-               return libbpf_err(-EINVAL);
-       }
-
-       instances_fds = malloc(sizeof(int) * nr_instances);
-       if (!instances_fds) {
-               pr_warn("alloc memory failed for fds\n");
-               return libbpf_err(-ENOMEM);
-       }
-
-       /* fill all fd with -1 */
-       memset(instances_fds, -1, sizeof(int) * nr_instances);
-
-       prog->instances.nr = nr_instances;
-       prog->instances.fds = instances_fds;
-       prog->preprocessor = prep;
-       return 0;
-}
-
-__attribute__((alias("bpf_program_nth_fd")))
-int bpf_program__nth_fd(const struct bpf_program *prog, int n);
-
-static int bpf_program_nth_fd(const struct bpf_program *prog, int n)
+int bpf_program__fd(const struct bpf_program *prog)
 {
-       int fd;
-
        if (!prog)
                return libbpf_err(-EINVAL);
 
-       if (n >= prog->instances.nr || n < 0) {
-               pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
-                       n, prog->name, prog->instances.nr);
-               return libbpf_err(-EINVAL);
-       }
-
-       fd = prog->instances.fds[n];
-       if (fd < 0) {
-               pr_warn("%dth instance of program '%s' is invalid\n",
-                       n, prog->name);
+       if (prog->fd < 0)
                return libbpf_err(-ENOENT);
-       }
 
-       return fd;
+       return prog->fd;
 }
 
 __alias(bpf_program__type)
@@ -9053,39 +8304,6 @@ int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
        return 0;
 }
 
-static bool bpf_program__is_type(const struct bpf_program *prog,
-                                enum bpf_prog_type type)
-{
-       return prog ? (prog->type == type) : false;
-}
-
-#define BPF_PROG_TYPE_FNS(NAME, TYPE)                          \
-int bpf_program__set_##NAME(struct bpf_program *prog)          \
-{                                                              \
-       if (!prog)                                              \
-               return libbpf_err(-EINVAL);                     \
-       return bpf_program__set_type(prog, TYPE);                       \
-}                                                              \
-                                                               \
-bool bpf_program__is_##NAME(const struct bpf_program *prog)    \
-{                                                              \
-       return bpf_program__is_type(prog, TYPE);                \
-}                                                              \
-
-BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
-BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
-BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
-BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
-BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
-BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
-BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
-BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
-BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
-BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
-BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
-BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
-BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
-
 __alias(bpf_program__expected_attach_type)
 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
 
@@ -9172,9 +8390,9 @@ static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_li
 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 
 static const struct bpf_sec_def section_defs[] = {
-       SEC_DEF("socket",               SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX),
-       SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("sk_reuseport",         SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
+       SEC_DEF("socket",               SOCKET_FILTER, 0, SEC_NONE),
+       SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
+       SEC_DEF("sk_reuseport",         SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
        SEC_DEF("kprobe+",              KPROBE, 0, SEC_NONE, attach_kprobe),
        SEC_DEF("uprobe+",              KPROBE, 0, SEC_NONE, attach_uprobe),
        SEC_DEF("uprobe.s+",            KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
@@ -9185,8 +8403,8 @@ static const struct bpf_sec_def section_defs[] = {
        SEC_DEF("kretprobe.multi+",     KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
        SEC_DEF("usdt+",                KPROBE, 0, SEC_NONE, attach_usdt),
        SEC_DEF("tc",                   SCHED_CLS, 0, SEC_NONE),
-       SEC_DEF("classifier",           SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED),
-       SEC_DEF("action",               SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX),
+       SEC_DEF("classifier",           SCHED_CLS, 0, SEC_NONE),
+       SEC_DEF("action",               SCHED_ACT, 0, SEC_NONE),
        SEC_DEF("tracepoint+",          TRACEPOINT, 0, SEC_NONE, attach_tp),
        SEC_DEF("tp+",                  TRACEPOINT, 0, SEC_NONE, attach_tp),
        SEC_DEF("raw_tracepoint+",      RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
@@ -9203,55 +8421,54 @@ static const struct bpf_sec_def section_defs[] = {
        SEC_DEF("freplace+",            EXT, 0, SEC_ATTACH_BTF, attach_trace),
        SEC_DEF("lsm+",                 LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
        SEC_DEF("lsm.s+",               LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
+       SEC_DEF("lsm_cgroup+",          LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
        SEC_DEF("iter+",                TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
        SEC_DEF("iter.s+",              TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
        SEC_DEF("syscall",              SYSCALL, 0, SEC_SLEEPABLE),
        SEC_DEF("xdp.frags/devmap",     XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
        SEC_DEF("xdp/devmap",           XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
-       SEC_DEF("xdp_devmap/",          XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE | SEC_DEPRECATED),
        SEC_DEF("xdp.frags/cpumap",     XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
        SEC_DEF("xdp/cpumap",           XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
-       SEC_DEF("xdp_cpumap/",          XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE | SEC_DEPRECATED),
        SEC_DEF("xdp.frags",            XDP, BPF_XDP, SEC_XDP_FRAGS),
-       SEC_DEF("xdp",                  XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-       SEC_DEF("perf_event",           PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX),
-       SEC_DEF("lwt_in",               LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX),
-       SEC_DEF("lwt_out",              LWT_OUT, 0, SEC_NONE | SEC_SLOPPY_PFX),
-       SEC_DEF("lwt_xmit",             LWT_XMIT, 0, SEC_NONE | SEC_SLOPPY_PFX),
-       SEC_DEF("lwt_seg6local",        LWT_SEG6LOCAL, 0, SEC_NONE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup_skb/ingress",   CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup_skb/egress",    CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/skb",           CGROUP_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/sock_create",   CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/sock_release",  CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/sock",          CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/post_bind4",    CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/post_bind6",    CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/dev",           CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-       SEC_DEF("sockops",              SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-       SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-       SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-       SEC_DEF("sk_skb",               SK_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX),
-       SEC_DEF("sk_msg",               SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-       SEC_DEF("lirc_mode2",           LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-       SEC_DEF("flow_dissector",       FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/bind4",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/bind6",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/connect4",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/connect6",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/sendmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/sendmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/recvmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/recvmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/getpeername4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/getpeername6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/getsockname4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/getsockname6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/sysctl",        CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/getsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-       SEC_DEF("cgroup/setsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
+       SEC_DEF("xdp",                  XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
+       SEC_DEF("perf_event",           PERF_EVENT, 0, SEC_NONE),
+       SEC_DEF("lwt_in",               LWT_IN, 0, SEC_NONE),
+       SEC_DEF("lwt_out",              LWT_OUT, 0, SEC_NONE),
+       SEC_DEF("lwt_xmit",             LWT_XMIT, 0, SEC_NONE),
+       SEC_DEF("lwt_seg6local",        LWT_SEG6LOCAL, 0, SEC_NONE),
+       SEC_DEF("sockops",              SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
+       SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
+       SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
+       SEC_DEF("sk_skb",               SK_SKB, 0, SEC_NONE),
+       SEC_DEF("sk_msg",               SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
+       SEC_DEF("lirc_mode2",           LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
+       SEC_DEF("flow_dissector",       FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
+       SEC_DEF("cgroup_skb/ingress",   CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
+       SEC_DEF("cgroup_skb/egress",    CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
+       SEC_DEF("cgroup/skb",           CGROUP_SKB, 0, SEC_NONE),
+       SEC_DEF("cgroup/sock_create",   CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/sock_release",  CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/sock",          CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
+       SEC_DEF("cgroup/post_bind4",    CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/post_bind6",    CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/bind4",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/bind6",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/connect4",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/connect6",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/sendmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/sendmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/recvmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/recvmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/getpeername4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/getpeername6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/getsockname4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/getsockname6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/sysctl",        CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/getsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/setsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
+       SEC_DEF("cgroup/dev",           CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
        SEC_DEF("struct_ops+",          STRUCT_OPS, 0, SEC_NONE),
-       SEC_DEF("sk_lookup",            SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
+       SEC_DEF("sk_lookup",            SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
 };
 
 static size_t custom_sec_def_cnt;
@@ -9346,8 +8563,7 @@ int libbpf_unregister_prog_handler(int handler_id)
        return 0;
 }
 
-static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name,
-                           bool allow_sloppy)
+static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
 {
        size_t len = strlen(sec_def->sec);
 
@@ -9372,17 +8588,6 @@ static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_n
                return false;
        }
 
-       /* SEC_SLOPPY_PFX definitions are allowed to be just prefix
-        * matches, unless strict section name mode
-        * (LIBBPF_STRICT_SEC_NAME) is enabled, in which case the
-        * match has to be exact.
-        */
-       if (allow_sloppy && str_has_pfx(sec_name, sec_def->sec))
-               return true;
-
-       /* Definitions not marked SEC_SLOPPY_PFX (e.g.,
-        * SEC("syscall")) are exact matches in both modes.
-        */
        return strcmp(sec_name, sec_def->sec) == 0;
 }
 
@@ -9390,20 +8595,18 @@ static const struct bpf_sec_def *find_sec_def(const char *sec_name)
 {
        const struct bpf_sec_def *sec_def;
        int i, n;
-       bool strict = libbpf_mode & LIBBPF_STRICT_SEC_NAME, allow_sloppy;
 
        n = custom_sec_def_cnt;
        for (i = 0; i < n; i++) {
                sec_def = &custom_sec_defs[i];
-               if (sec_def_matches(sec_def, sec_name, false))
+               if (sec_def_matches(sec_def, sec_name))
                        return sec_def;
        }
 
        n = ARRAY_SIZE(section_defs);
        for (i = 0; i < n; i++) {
                sec_def = &section_defs[i];
-               allow_sloppy = (sec_def->cookie & SEC_SLOPPY_PFX) && !strict;
-               if (sec_def_matches(sec_def, sec_name, allow_sloppy))
+               if (sec_def_matches(sec_def, sec_name))
                        return sec_def;
        }
 
@@ -9656,6 +8859,7 @@ void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
                *kind = BTF_KIND_TYPEDEF;
                break;
        case BPF_LSM_MAC:
+       case BPF_LSM_CGROUP:
                *prefix = BTF_LSM_PREFIX;
                *kind = BTF_KIND_FUNC;
                break;
@@ -9859,11 +9063,6 @@ int bpf_map__fd(const struct bpf_map *map)
        return map ? map->fd : libbpf_err(-EINVAL);
 }
 
-const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
-{
-       return map ? &map->def : libbpf_err_ptr(-EINVAL);
-}
-
 static bool map_uses_real_name(const struct bpf_map *map)
 {
        /* Since libbpf started to support custom .data.* and .rodata.* maps,
@@ -9978,27 +9177,6 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
        return map ? map->btf_value_type_id : 0;
 }
 
-int bpf_map__set_priv(struct bpf_map *map, void *priv,
-                    bpf_map_clear_priv_t clear_priv)
-{
-       if (!map)
-               return libbpf_err(-EINVAL);
-
-       if (map->priv) {
-               if (map->clear_priv)
-                       map->clear_priv(map, map->priv);
-       }
-
-       map->priv = priv;
-       map->clear_priv = clear_priv;
-       return 0;
-}
-
-void *bpf_map__priv(const struct bpf_map *map)
-{
-       return map ? map->priv : libbpf_err_ptr(-EINVAL);
-}
-
 int bpf_map__set_initial_value(struct bpf_map *map,
                               const void *data, size_t size)
 {
@@ -10018,11 +9196,6 @@ const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
        return map->mmaped;
 }
 
-bool bpf_map__is_offload_neutral(const struct bpf_map *map)
-{
-       return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
-}
-
 bool bpf_map__is_internal(const struct bpf_map *map)
 {
        return map->libbpf_type != LIBBPF_MAP_UNSPEC;
@@ -10083,12 +9256,6 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
        return &obj->maps[idx];
 }
 
-struct bpf_map *
-bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
-{
-       return bpf_object__next_map(obj, prev);
-}
-
 struct bpf_map *
 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
 {
@@ -10098,12 +9265,6 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
        return __bpf_map__iter(prev, obj, 1);
 }
 
-struct bpf_map *
-bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
-{
-       return bpf_object__prev_map(obj, next);
-}
-
 struct bpf_map *
 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
 {
@@ -10149,12 +9310,6 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
        return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
 }
 
-struct bpf_map *
-bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
-{
-       return libbpf_err_ptr(-ENOTSUP);
-}
-
 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
                           size_t value_sz, bool check_value_sz)
 {
@@ -10275,95 +9430,6 @@ long libbpf_get_error(const void *ptr)
        return -errno;
 }
 
-__attribute__((alias("bpf_prog_load_xattr2")))
-int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
-                       struct bpf_object **pobj, int *prog_fd);
-
-static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr,
-                               struct bpf_object **pobj, int *prog_fd)
-{
-       struct bpf_object_open_attr open_attr = {};
-       struct bpf_program *prog, *first_prog = NULL;
-       struct bpf_object *obj;
-       struct bpf_map *map;
-       int err;
-
-       if (!attr)
-               return libbpf_err(-EINVAL);
-       if (!attr->file)
-               return libbpf_err(-EINVAL);
-
-       open_attr.file = attr->file;
-       open_attr.prog_type = attr->prog_type;
-
-       obj = __bpf_object__open_xattr(&open_attr, 0);
-       err = libbpf_get_error(obj);
-       if (err)
-               return libbpf_err(-ENOENT);
-
-       bpf_object__for_each_program(prog, obj) {
-               enum bpf_attach_type attach_type = attr->expected_attach_type;
-               /*
-                * to preserve backwards compatibility, bpf_prog_load treats
-                * attr->prog_type, if specified, as an override to whatever
-                * bpf_object__open guessed
-                */
-               if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
-                       prog->type = attr->prog_type;
-                       prog->expected_attach_type = attach_type;
-               }
-               if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) {
-                       /*
-                        * we haven't guessed from section name and user
-                        * didn't provide a fallback type, too bad...
-                        */
-                       bpf_object__close(obj);
-                       return libbpf_err(-EINVAL);
-               }
-
-               prog->prog_ifindex = attr->ifindex;
-               prog->log_level = attr->log_level;
-               prog->prog_flags |= attr->prog_flags;
-               if (!first_prog)
-                       first_prog = prog;
-       }
-
-       bpf_object__for_each_map(map, obj) {
-               if (map->def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
-                       map->map_ifindex = attr->ifindex;
-       }
-
-       if (!first_prog) {
-               pr_warn("object file doesn't contain bpf program\n");
-               bpf_object__close(obj);
-               return libbpf_err(-ENOENT);
-       }
-
-       err = bpf_object__load(obj);
-       if (err) {
-               bpf_object__close(obj);
-               return libbpf_err(err);
-       }
-
-       *pobj = obj;
-       *prog_fd = bpf_program__fd(first_prog);
-       return 0;
-}
-
-COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1)
-int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type,
-                            struct bpf_object **pobj, int *prog_fd)
-{
-       struct bpf_prog_load_attr attr;
-
-       memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
-       attr.file = file;
-       attr.prog_type = type;
-       attr.expected_attach_type = 0;
-
-       return bpf_prog_load_xattr2(&attr, pobj, prog_fd);
-}
-
 /* Replace link's underlying BPF program with the new one */
 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
 {
@@ -10811,10 +9877,11 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
        }
        type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
        if (type < 0) {
+               err = type;
                pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
                        kfunc_name, offset,
-                       libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
-               return type;
+                       libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+               goto err_clean_legacy;
        }
        attr.size = sizeof(attr);
        attr.config = type;
@@ -10828,9 +9895,14 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
                err = -errno;
                pr_warn("legacy kprobe perf_event_open() failed: %s\n",
                        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               return err;
+               goto err_clean_legacy;
        }
        return pfd;
+
+err_clean_legacy:
+       /* Clear the newly added legacy kprobe_event */
+       remove_kprobe_event_legacy(probe_name, retprobe);
+       return err;
 }
 
 struct bpf_link *
@@ -10887,7 +9959,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
                        prog->name, retprobe ? "kretprobe" : "kprobe",
                        func_name, offset,
                        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               goto err_out;
+               goto err_clean_legacy;
        }
        if (legacy) {
                struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
@@ -10898,6 +9970,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
        }
 
        return link;
+
+err_clean_legacy:
+       if (legacy)
+               remove_kprobe_event_legacy(legacy_probe, retprobe);
 err_out:
        free(legacy_probe);
        return libbpf_err_ptr(err);
@@ -11172,9 +10248,10 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
        }
        type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
        if (type < 0) {
+               err = type;
                pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
                        binary_path, offset, err);
-               return type;
+               goto err_clean_legacy;
        }
 
        memset(&attr, 0, sizeof(attr));
@@ -11189,9 +10266,14 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
        if (pfd < 0) {
                err = -errno;
                pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
-               return err;
+               goto err_clean_legacy;
        }
        return pfd;
+
+err_clean_legacy:
+       /* Clear the newly added legacy uprobe_event */
+       remove_uprobe_event_legacy(probe_name, retprobe);
+       return err;
 }
 
 /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
@@ -11525,7 +10607,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
                        prog->name, retprobe ? "uretprobe" : "uprobe",
                        binary_path, func_offset,
                        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               goto err_out;
+               goto err_clean_legacy;
        }
        if (legacy) {
                struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
@@ -11535,10 +10617,13 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
                perf_link->legacy_is_retprobe = retprobe;
        }
        return link;
+
+err_clean_legacy:
+       if (legacy)
+               remove_uprobe_event_legacy(legacy_probe, retprobe);
 err_out:
        free(legacy_probe);
        return libbpf_err_ptr(err);
-
 }
 
 /* Format of u[ret]probe section definition supporting auto-attach:
@@ -12150,6 +11235,9 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
        return link;
 }
 
+typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
+                                                         void *private_data);
+
 static enum bpf_perf_event_ret
 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
                       void **copy_mem, size_t *copy_size,
@@ -12198,12 +11286,6 @@ perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
        return libbpf_err(ret);
 }
 
-__attribute__((alias("perf_event_read_simple")))
-enum bpf_perf_event_ret
-bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
-                          void **copy_mem, size_t *copy_size,
-                          bpf_perf_event_print_t fn, void *private_data);
-
 struct perf_buffer;
 
 struct perf_buffer_params {
@@ -12337,12 +11419,11 @@ error:
 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
                                              struct perf_buffer_params *p);
 
-DEFAULT_VERSION(perf_buffer__new_v0_6_0, perf_buffer__new, LIBBPF_0.6.0)
-struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt,
-                                           perf_buffer_sample_fn sample_cb,
-                                           perf_buffer_lost_fn lost_cb,
-                                           void *ctx,
-                                           const struct perf_buffer_opts *opts)
+struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
+                                    perf_buffer_sample_fn sample_cb,
+                                    perf_buffer_lost_fn lost_cb,
+                                    void *ctx,
+                                    const struct perf_buffer_opts *opts)
 {
        struct perf_buffer_params p = {};
        struct perf_event_attr attr = {};
@@ -12364,22 +11445,10 @@ struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt,
        return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
 }
 
-COMPAT_VERSION(perf_buffer__new_deprecated, perf_buffer__new, LIBBPF_0.0.4)
-struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt,
-                                               const struct perf_buffer_opts *opts)
-{
-       return perf_buffer__new_v0_6_0(map_fd, page_cnt,
-                                      opts ? opts->sample_cb : NULL,
-                                      opts ? opts->lost_cb : NULL,
-                                      opts ? opts->ctx : NULL,
-                                      NULL);
-}
-
-DEFAULT_VERSION(perf_buffer__new_raw_v0_6_0, perf_buffer__new_raw, LIBBPF_0.6.0)
-struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt,
-                                               struct perf_event_attr *attr,
-                                               perf_buffer_event_fn event_cb, void *ctx,
-                                               const struct perf_buffer_raw_opts *opts)
+struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
+                                        struct perf_event_attr *attr,
+                                        perf_buffer_event_fn event_cb, void *ctx,
+                                        const struct perf_buffer_raw_opts *opts)
 {
        struct perf_buffer_params p = {};
 
@@ -12399,20 +11468,6 @@ struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt,
        return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
 }
 
-COMPAT_VERSION(perf_buffer__new_raw_deprecated, perf_buffer__new_raw, LIBBPF_0.0.4)
-struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt,
-                                                   const struct perf_buffer_raw_opts *opts)
-{
-       LIBBPF_OPTS(perf_buffer_raw_opts, inner_opts,
-               .cpu_cnt = opts->cpu_cnt,
-               .cpus = opts->cpus,
-               .map_keys = opts->map_keys,
-       );
-
-       return perf_buffer__new_raw_v0_6_0(map_fd, page_cnt, opts->attr,
-                                          opts->event_cb, opts->ctx, &inner_opts);
-}
-
 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
                                              struct perf_buffer_params *p)
 {
@@ -12714,254 +11769,6 @@ int perf_buffer__consume(struct perf_buffer *pb)
        return 0;
 }
 
-struct bpf_prog_info_array_desc {
-       int     array_offset;   /* e.g. offset of jited_prog_insns */
-       int     count_offset;   /* e.g. offset of jited_prog_len */
-       int     size_offset;    /* > 0: offset of rec size,
-                                * < 0: fix size of -size_offset
-                                */
-};
-
-static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
-       [BPF_PROG_INFO_JITED_INSNS] = {
-               offsetof(struct bpf_prog_info, jited_prog_insns),
-               offsetof(struct bpf_prog_info, jited_prog_len),
-               -1,
-       },
-       [BPF_PROG_INFO_XLATED_INSNS] = {
-               offsetof(struct bpf_prog_info, xlated_prog_insns),
-               offsetof(struct bpf_prog_info, xlated_prog_len),
-               -1,
-       },
-       [BPF_PROG_INFO_MAP_IDS] = {
-               offsetof(struct bpf_prog_info, map_ids),
-               offsetof(struct bpf_prog_info, nr_map_ids),
-               -(int)sizeof(__u32),
-       },
-       [BPF_PROG_INFO_JITED_KSYMS] = {
-               offsetof(struct bpf_prog_info, jited_ksyms),
-               offsetof(struct bpf_prog_info, nr_jited_ksyms),
-               -(int)sizeof(__u64),
-       },
-       [BPF_PROG_INFO_JITED_FUNC_LENS] = {
-               offsetof(struct bpf_prog_info, jited_func_lens),
-               offsetof(struct bpf_prog_info, nr_jited_func_lens),
-               -(int)sizeof(__u32),
-       },
-       [BPF_PROG_INFO_FUNC_INFO] = {
-               offsetof(struct bpf_prog_info, func_info),
-               offsetof(struct bpf_prog_info, nr_func_info),
-               offsetof(struct bpf_prog_info, func_info_rec_size),
-       },
-       [BPF_PROG_INFO_LINE_INFO] = {
-               offsetof(struct bpf_prog_info, line_info),
-               offsetof(struct bpf_prog_info, nr_line_info),
-               offsetof(struct bpf_prog_info, line_info_rec_size),
-       },
-       [BPF_PROG_INFO_JITED_LINE_INFO] = {
-               offsetof(struct bpf_prog_info, jited_line_info),
-               offsetof(struct bpf_prog_info, nr_jited_line_info),
-               offsetof(struct bpf_prog_info, jited_line_info_rec_size),
-       },
-       [BPF_PROG_INFO_PROG_TAGS] = {
-               offsetof(struct bpf_prog_info, prog_tags),
-               offsetof(struct bpf_prog_info, nr_prog_tags),
-               -(int)sizeof(__u8) * BPF_TAG_SIZE,
-       },
-
-};
-
-static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
-                                          int offset)
-{
-       __u32 *array = (__u32 *)info;
-
-       if (offset >= 0)
-               return array[offset / sizeof(__u32)];
-       return -(int)offset;
-}
-
-static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
-                                          int offset)
-{
-       __u64 *array = (__u64 *)info;
-
-       if (offset >= 0)
-               return array[offset / sizeof(__u64)];
-       return -(int)offset;
-}
-
-static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
-                                        __u32 val)
-{
-       __u32 *array = (__u32 *)info;
-
-       if (offset >= 0)
-               array[offset / sizeof(__u32)] = val;
-}
-
-static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
-                                        __u64 val)
-{
-       __u64 *array = (__u64 *)info;
-
-       if (offset >= 0)
-               array[offset / sizeof(__u64)] = val;
-}
-
-struct bpf_prog_info_linear *
-bpf_program__get_prog_info_linear(int fd, __u64 arrays)
-{
-       struct bpf_prog_info_linear *info_linear;
-       struct bpf_prog_info info = {};
-       __u32 info_len = sizeof(info);
-       __u32 data_len = 0;
-       int i, err;
-       void *ptr;
-
-       if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
-               return libbpf_err_ptr(-EINVAL);
-
-       /* step 1: get array dimensions */
-       err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
-       if (err) {
-               pr_debug("can't get prog info: %s", strerror(errno));
-               return libbpf_err_ptr(-EFAULT);
-       }
-
-       /* step 2: calculate total size of all arrays */
-       for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
-               bool include_array = (arrays & (1UL << i)) > 0;
-               struct bpf_prog_info_array_desc *desc;
-               __u32 count, size;
-
-               desc = bpf_prog_info_array_desc + i;
-
-               /* kernel is too old to support this field */
-               if (info_len < desc->array_offset + sizeof(__u32) ||
-                   info_len < desc->count_offset + sizeof(__u32) ||
-                   (desc->size_offset > 0 && info_len < desc->size_offset))
-                       include_array = false;
-
-               if (!include_array) {
-                       arrays &= ~(1UL << i);  /* clear the bit */
-                       continue;
-               }
-
-               count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
-               size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
-
-               data_len += count * size;
-       }
-
-       /* step 3: allocate continuous memory */
-       data_len = roundup(data_len, sizeof(__u64));
-       info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
-       if (!info_linear)
-               return libbpf_err_ptr(-ENOMEM);
-
-       /* step 4: fill data to info_linear->info */
-       info_linear->arrays = arrays;
-       memset(&info_linear->info, 0, sizeof(info));
-       ptr = info_linear->data;
-
-       for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
-               struct bpf_prog_info_array_desc *desc;
-               __u32 count, size;
-
-               if ((arrays & (1UL << i)) == 0)
-                       continue;
-
-               desc  = bpf_prog_info_array_desc + i;
-               count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
-               size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
-               bpf_prog_info_set_offset_u32(&info_linear->info,
-                                            desc->count_offset, count);
-               bpf_prog_info_set_offset_u32(&info_linear->info,
-                                            desc->size_offset, size);
-               bpf_prog_info_set_offset_u64(&info_linear->info,
-                                            desc->array_offset,
-                                            ptr_to_u64(ptr));
-               ptr += count * size;
-       }
-
-       /* step 5: call syscall again to get required arrays */
-       err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
-       if (err) {
-               pr_debug("can't get prog info: %s", strerror(errno));
-               free(info_linear);
-               return libbpf_err_ptr(-EFAULT);
-       }
-
-       /* step 6: verify the data */
-       for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
-               struct bpf_prog_info_array_desc *desc;
-               __u32 v1, v2;
-
-               if ((arrays & (1UL << i)) == 0)
-                       continue;
-
-               desc = bpf_prog_info_array_desc + i;
-               v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
-               v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
-                                                  desc->count_offset);
-               if (v1 != v2)
-                       pr_warn("%s: mismatch in element count\n", __func__);
-
-               v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
-               v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
-                                                  desc->size_offset);
-               if (v1 != v2)
-                       pr_warn("%s: mismatch in rec size\n", __func__);
-       }
-
-       /* step 7: update info_len and data_len */
-       info_linear->info_len = sizeof(struct bpf_prog_info);
-       info_linear->data_len = data_len;
-
-       return info_linear;
-}
-
-void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
-{
-       int i;
-
-       for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
-               struct bpf_prog_info_array_desc *desc;
-               __u64 addr, offs;
-
-               if ((info_linear->arrays & (1UL << i)) == 0)
-                       continue;
-
-               desc = bpf_prog_info_array_desc + i;
-               addr = bpf_prog_info_read_offset_u64(&info_linear->info,
-                                                    desc->array_offset);
-               offs = addr - ptr_to_u64(info_linear->data);
-               bpf_prog_info_set_offset_u64(&info_linear->info,
-                                            desc->array_offset, offs);
-       }
-}
-
-void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
-{
-       int i;
-
-       for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
-               struct bpf_prog_info_array_desc *desc;
-               __u64 addr, offs;
-
-               if ((info_linear->arrays & (1UL << i)) == 0)
-                       continue;
-
-               desc = bpf_prog_info_array_desc + i;
-               offs = bpf_prog_info_read_offset_u64(&info_linear->info,
-                                                    desc->array_offset);
-               addr = offs + ptr_to_u64(info_linear->data);
-               bpf_prog_info_set_offset_u64(&info_linear->info,
-                                            desc->array_offset, addr);
-       }
-}
-
 int bpf_program__set_attach_target(struct bpf_program *prog,
                                   int attach_prog_fd,
                                   const char *attach_func_name)
index fa27969..e4d5353 100644 (file)
@@ -101,11 +101,6 @@ LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn);
 /* Hide internal to user */
 struct bpf_object;
 
-struct bpf_object_open_attr {
-       const char *file;
-       enum bpf_prog_type prog_type;
-};
-
 struct bpf_object_open_opts {
        /* size of this struct, for forward/backward compatibility */
        size_t sz;
@@ -118,21 +113,12 @@ struct bpf_object_open_opts {
        const char *object_name;
        /* parse map definitions non-strictly, allowing extra attributes/data */
        bool relaxed_maps;
-       /* DEPRECATED: handle CO-RE relocations non-strictly, allowing failures.
-        * Value is ignored. Relocations always are processed non-strictly.
-        * Non-relocatable instructions are replaced with invalid ones to
-        * prevent accidental errors.
-        * */
-       LIBBPF_DEPRECATED_SINCE(0, 6, "field has no effect")
-       bool relaxed_core_relocs;
        /* maps that set the 'pinning' attribute in their definition will have
         * their pin_path attribute set to a file in this directory, and be
         * auto-pinned to that path on load; defaults to "/sys/fs/bpf".
         */
        const char *pin_root_path;
-
-       LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__set_attach_target() on each individual bpf_program")
-       __u32 attach_prog_fd;
+       long :0;
        /* Additional kernel config content that augments and overrides
         * system Kconfig for CONFIG_xxx externs.
         */
@@ -215,20 +201,10 @@ LIBBPF_API struct bpf_object *
 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
                     const struct bpf_object_open_opts *opts);
 
-/* deprecated bpf_object__open variants */
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open_mem() instead")
-LIBBPF_API struct bpf_object *
-bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
-                       const char *name);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open_file() instead")
-LIBBPF_API struct bpf_object *
-bpf_object__open_xattr(struct bpf_object_open_attr *attr);
+/* Load/unload object into/from kernel */
+LIBBPF_API int bpf_object__load(struct bpf_object *obj);
 
-enum libbpf_pin_type {
-       LIBBPF_PIN_NONE,
-       /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
-       LIBBPF_PIN_BY_NAME,
-};
+LIBBPF_API void bpf_object__close(struct bpf_object *object);
 
 /* pin_maps and unpin_maps can both be called with a NULL path, in which case
  * they will use the pin_path attribute of each map (and ignore all maps that
@@ -242,20 +218,6 @@ LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj,
 LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
                                          const char *path);
 LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
-LIBBPF_API void bpf_object__close(struct bpf_object *object);
-
-struct bpf_object_load_attr {
-       struct bpf_object *obj;
-       int log_level;
-       const char *target_btf_path;
-};
-
-/* Load/unload object into/from kernel */
-LIBBPF_API int bpf_object__load(struct bpf_object *obj);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__load() instead")
-LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
-LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead")
-LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
 
 LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
 LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
@@ -265,29 +227,10 @@ struct btf;
 LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
 LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
 
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__find_program_by_name() instead")
-LIBBPF_API struct bpf_program *
-bpf_object__find_program_by_title(const struct bpf_object *obj,
-                                 const char *title);
 LIBBPF_API struct bpf_program *
 bpf_object__find_program_by_name(const struct bpf_object *obj,
                                 const char *name);
 
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "track bpf_objects in application code instead")
-struct bpf_object *bpf_object__next(struct bpf_object *prev);
-#define bpf_object__for_each_safe(pos, tmp)                    \
-       for ((pos) = bpf_object__next(NULL),            \
-               (tmp) = bpf_object__next(pos);          \
-            (pos) != NULL;                             \
-            (pos) = (tmp), (tmp) = bpf_object__next(tmp))
-
-typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
-                                   bpf_object_clear_priv_t clear_priv);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
-
 LIBBPF_API int
 libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
                         enum bpf_attach_type *expected_attach_type);
@@ -298,9 +241,7 @@ LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name,
 
 /* Accessors of bpf_program */
 struct bpf_program;
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_program() instead")
-struct bpf_program *bpf_program__next(struct bpf_program *prog,
-                                     const struct bpf_object *obj);
+
 LIBBPF_API struct bpf_program *
 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog);
 
@@ -309,33 +250,17 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog)
             (pos) != NULL;                                     \
             (pos) = bpf_object__next_program((obj), (pos)))
 
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_program() instead")
-struct bpf_program *bpf_program__prev(struct bpf_program *prog,
-                                     const struct bpf_object *obj);
 LIBBPF_API struct bpf_program *
 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog);
 
-typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
-                                    bpf_program_clear_priv_t clear_priv);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
 LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
                                         __u32 ifindex);
 
 LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
 LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog);
-LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead")
-const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy);
 LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
 
-/* returns program size in bytes */
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insn_cnt() instead")
-LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog);
-
 struct bpf_insn;
 
 /**
@@ -388,17 +313,7 @@ LIBBPF_API int bpf_program__set_insns(struct bpf_program *prog,
  */
 LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog);
 
-LIBBPF_DEPRECATED_SINCE(0, 6, "use bpf_object__load() instead")
-LIBBPF_API int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_version);
 LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated")
-LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
-                                        const char *path,
-                                        int instance);
-LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated")
-LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog,
-                                          const char *path,
-                                          int instance);
 
 /**
  * @brief **bpf_program__pin()** pins the BPF program to a file
@@ -698,99 +613,6 @@ LIBBPF_API struct bpf_link *
 bpf_program__attach_iter(const struct bpf_program *prog,
                         const struct bpf_iter_attach_opts *opts);
 
-/*
- * Libbpf allows callers to adjust BPF programs before being loaded
- * into kernel. One program in an object file can be transformed into
- * multiple variants to be attached to different hooks.
- *
- * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
- * form an API for this purpose.
- *
- * - bpf_program_prep_t:
- *   Defines a 'preprocessor', which is a caller defined function
- *   passed to libbpf through bpf_program__set_prep(), and will be
- *   called before program is loaded. The processor should adjust
- *   the program one time for each instance according to the instance id
- *   passed to it.
- *
- * - bpf_program__set_prep:
- *   Attaches a preprocessor to a BPF program. The number of instances
- *   that should be created is also passed through this function.
- *
- * - bpf_program__nth_fd:
- *   After the program is loaded, get resulting FD of a given instance
- *   of the BPF program.
- *
- * If bpf_program__set_prep() is not used, the program would be loaded
- * without adjustment during bpf_object__load(). The program has only
- * one instance. In this case bpf_program__fd(prog) is equal to
- * bpf_program__nth_fd(prog, 0).
- */
-struct bpf_prog_prep_result {
-       /*
-        * If not NULL, load new instruction array.
-        * If set to NULL, don't load this instance.
-        */
-       struct bpf_insn *new_insn_ptr;
-       int new_insn_cnt;
-
-       /* If not NULL, result FD is written to it. */
-       int *pfd;
-};
-
-/*
- * Parameters of bpf_program_prep_t:
- *  - prog:    The bpf_program being loaded.
- *  - n:       Index of instance being generated.
- *  - insns:   BPF instructions array.
- *  - insns_cnt:Number of instructions in insns.
- *  - res:     Output parameter, result of transformation.
- *
- * Return value:
- *  - Zero:    pre-processing success.
- *  - Non-zero:        pre-processing error, stop loading.
- */
-typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
-                                 struct bpf_insn *insns, int insns_cnt,
-                                 struct bpf_prog_prep_result *res);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions")
-LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
-                                    bpf_program_prep_t prep);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated")
-LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);
-
-/*
- * Adjust type of BPF program. Default is kprobe.
- */
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
-
 LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog);
 
 /**
@@ -853,47 +675,6 @@ LIBBPF_API int
 bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
                               const char *attach_func_name);
 
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
-
-/*
- * No need for __attribute__((packed)), all members of 'bpf_map_def'
- * are all aligned.  In addition, using __attribute__((packed))
- * would trigger a -Wpacked warning message, and lead to an error
- * if -Werror is set.
- */
-struct bpf_map_def {
-       unsigned int type;
-       unsigned int key_size;
-       unsigned int value_size;
-       unsigned int max_entries;
-       unsigned int map_flags;
-};
-
 /**
  * @brief **bpf_object__find_map_by_name()** returns BPF map of
  * the given name, if it exists within the passed BPF object
@@ -908,16 +689,6 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);
 LIBBPF_API int
 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
 
-/*
- * Get bpf_map through the offset of corresponding struct bpf_map_def
- * in the BPF object file.
- */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead")
-struct bpf_map *
-bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead")
-struct bpf_map *bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
 LIBBPF_API struct bpf_map *
 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map);
 
@@ -927,8 +698,6 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map);
             (pos) = bpf_object__next_map((obj), (pos)))
 #define bpf_map__for_each bpf_object__for_each_map
 
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_map() instead")
-struct bpf_map *bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
 LIBBPF_API struct bpf_map *
 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map);
 
@@ -962,9 +731,6 @@ LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map);
  */
 LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
 LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
-/* get map definition */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead")
-const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
 /* get map name */
 LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
 /* get/set map type */
@@ -973,8 +739,6 @@ LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type);
 /* get/set map size (max_entries) */
 LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__set_max_entries() instead")
-LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
 /* get/set map flags */
 LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags);
@@ -997,17 +761,9 @@ LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);
 
-typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
-                                bpf_map_clear_priv_t clear_priv);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
                                          const void *data, size_t size);
 LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead")
-LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
 
 /**
  * @brief **bpf_map__is_internal()** tells the caller whether or not the
@@ -1130,65 +886,6 @@ LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
 LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map,
                                     const void *cur_key, void *next_key, size_t key_sz);
 
-/**
- * @brief **libbpf_get_error()** extracts the error code from the passed
- * pointer
- * @param ptr pointer returned from libbpf API function
- * @return error code; or 0 if no error occured
- *
- * Many libbpf API functions which return pointers have logic to encode error
- * codes as pointers, and do not return NULL. Meaning **libbpf_get_error()**
- * should be used on the return value from these functions immediately after
- * calling the API function, with no intervening calls that could clobber the
- * `errno` variable. Consult the individual functions documentation to verify
- * if this logic applies should be used.
- *
- * For these API functions, if `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)`
- * is enabled, NULL is returned on error instead.
- *
- * If ptr is NULL, then errno should be already set by the failing
- * API, because libbpf never returns NULL on success and it now always
- * sets errno on error.
- *
- * Example usage:
- *
- *   struct perf_buffer *pb;
- *
- *   pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES, &opts);
- *   err = libbpf_get_error(pb);
- *   if (err) {
- *       pb = NULL;
- *       fprintf(stderr, "failed to open perf buffer: %d\n", err);
- *       goto cleanup;
- *   }
- */
-LIBBPF_API long libbpf_get_error(const void *ptr);
-
-struct bpf_prog_load_attr {
-       const char *file;
-       enum bpf_prog_type prog_type;
-       enum bpf_attach_type expected_attach_type;
-       int ifindex;
-       int log_level;
-       int prog_flags;
-};
-
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open() and bpf_object__load() instead")
-LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
-                                  struct bpf_object **pobj, int *prog_fd);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open() and bpf_object__load() instead")
-LIBBPF_API int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type,
-                                       struct bpf_object **pobj, int *prog_fd);
-
-/* XDP related API */
-struct xdp_link_info {
-       __u32 prog_id;
-       __u32 drv_prog_id;
-       __u32 hw_prog_id;
-       __u32 skb_prog_id;
-       __u8 attach_mode;
-};
-
 struct bpf_xdp_set_link_opts {
        size_t sz;
        int old_fd;
@@ -1196,17 +893,6 @@ struct bpf_xdp_set_link_opts {
 };
 #define bpf_xdp_set_link_opts__last_field old_fd
 
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
-LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
-LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
-                                       const struct bpf_xdp_set_link_opts *opts);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead")
-LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead")
-LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
-                                    size_t info_size, __u32 flags);
-
 struct bpf_xdp_attach_opts {
        size_t sz;
        int old_prog_fd;
@@ -1305,17 +991,7 @@ typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);
 
 /* common use perf buffer options */
 struct perf_buffer_opts {
-       union {
-               size_t sz;
-               struct { /* DEPRECATED: will be removed in v1.0 */
-                       /* if specified, sample_cb is called for each sample */
-                       perf_buffer_sample_fn sample_cb;
-                       /* if specified, lost_cb is called for each batch of lost samples */
-                       perf_buffer_lost_fn lost_cb;
-                       /* ctx is provided to sample_cb and lost_cb */
-                       void *ctx;
-               };
-       };
+       size_t sz;
 };
 #define perf_buffer_opts__last_field sz
 
@@ -1336,21 +1012,6 @@ perf_buffer__new(int map_fd, size_t page_cnt,
                 perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx,
                 const struct perf_buffer_opts *opts);
 
-LIBBPF_API struct perf_buffer *
-perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt,
-                       perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx,
-                       const struct perf_buffer_opts *opts);
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new() instead")
-struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt,
-                                               const struct perf_buffer_opts *opts);
-
-#define perf_buffer__new(...) ___libbpf_overload(___perf_buffer_new, __VA_ARGS__)
-#define ___perf_buffer_new6(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) \
-       perf_buffer__new(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts)
-#define ___perf_buffer_new3(map_fd, page_cnt, opts) \
-       perf_buffer__new_deprecated(map_fd, page_cnt, opts)
-
 enum bpf_perf_event_ret {
        LIBBPF_PERF_EVENT_DONE  = 0,
        LIBBPF_PERF_EVENT_ERROR = -1,
@@ -1364,21 +1025,9 @@ typedef enum bpf_perf_event_ret
 
 /* raw perf buffer options, giving most power and control */
 struct perf_buffer_raw_opts {
-       union {
-               struct {
-                       size_t sz;
-                       long :0;
-                       long :0;
-               };
-               struct { /* DEPRECATED: will be removed in v1.0 */
-                       /* perf event attrs passed directly into perf_event_open() */
-                       struct perf_event_attr *attr;
-                       /* raw event callback */
-                       perf_buffer_event_fn event_cb;
-                       /* ctx is provided to event_cb */
-                       void *ctx;
-               };
-       };
+       size_t sz;
+       long :0;
+       long :0;
        /* if cpu_cnt == 0, open all on all possible CPUs (up to the number of
         * max_entries of given PERF_EVENT_ARRAY map)
         */
@@ -1390,26 +1039,13 @@ struct perf_buffer_raw_opts {
 };
 #define perf_buffer_raw_opts__last_field map_keys
 
+struct perf_event_attr;
+
 LIBBPF_API struct perf_buffer *
 perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr,
                     perf_buffer_event_fn event_cb, void *ctx,
                     const struct perf_buffer_raw_opts *opts);
 
-LIBBPF_API struct perf_buffer *
-perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, struct perf_event_attr *attr,
-                           perf_buffer_event_fn event_cb, void *ctx,
-                           const struct perf_buffer_raw_opts *opts);
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new_raw() instead")
-struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt,
-                                                   const struct perf_buffer_raw_opts *opts);
-
-#define perf_buffer__new_raw(...) ___libbpf_overload(___perf_buffer_new_raw, __VA_ARGS__)
-#define ___perf_buffer_new_raw6(map_fd, page_cnt, attr, event_cb, ctx, opts) \
-       perf_buffer__new_raw(map_fd, page_cnt, attr, event_cb, ctx, opts)
-#define ___perf_buffer_new_raw3(map_fd, page_cnt, opts) \
-       perf_buffer__new_raw_deprecated(map_fd, page_cnt, opts)
-
 LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
 LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb);
 LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
@@ -1418,15 +1054,6 @@ LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_id
 LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb);
 LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx);
 
-typedef enum bpf_perf_event_ret
-       (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
-                                 void *private_data);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or  perf_buffer__consume() instead")
-LIBBPF_API enum bpf_perf_event_ret
-bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
-                          void **copy_mem, size_t *copy_size,
-                          bpf_perf_event_print_t fn, void *private_data);
-
 struct bpf_prog_linfo;
 struct bpf_prog_info;
 
@@ -1448,14 +1075,6 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
  * user, causing subsequent probes to fail. In this case, the caller may want
  * to adjust that limit with setrlimit().
  */
-LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_prog_type() instead")
-LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_map_type() instead")
-LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_helper() instead")
-LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex);
-LIBBPF_DEPRECATED_SINCE(0, 8, "implement your own or use bpftool for feature detection")
-LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex);
 
 /**
  * @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports
@@ -1499,72 +1118,6 @@ LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void
 LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type,
                                       enum bpf_func_id helper_id, const void *opts);
 
-/*
- * Get bpf_prog_info in continuous memory
- *
- * struct bpf_prog_info has multiple arrays. The user has option to choose
- * arrays to fetch from kernel. The following APIs provide an uniform way to
- * fetch these data. All arrays in bpf_prog_info are stored in a single
- * continuous memory region. This makes it easy to store the info in a
- * file.
- *
- * Before writing bpf_prog_info_linear to files, it is necessary to
- * translate pointers in bpf_prog_info to offsets. Helper functions
- * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr()
- * are introduced to switch between pointers and offsets.
- *
- * Examples:
- *   # To fetch map_ids and prog_tags:
- *   __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) |
- *           (1UL << BPF_PROG_INFO_PROG_TAGS);
- *   struct bpf_prog_info_linear *info_linear =
- *           bpf_program__get_prog_info_linear(fd, arrays);
- *
- *   # To save data in file
- *   bpf_program__bpil_addr_to_offs(info_linear);
- *   write(f, info_linear, sizeof(*info_linear) + info_linear->data_len);
- *
- *   # To read data from file
- *   read(f, info_linear, <proper_size>);
- *   bpf_program__bpil_offs_to_addr(info_linear);
- */
-enum bpf_prog_info_array {
-       BPF_PROG_INFO_FIRST_ARRAY = 0,
-       BPF_PROG_INFO_JITED_INSNS = 0,
-       BPF_PROG_INFO_XLATED_INSNS,
-       BPF_PROG_INFO_MAP_IDS,
-       BPF_PROG_INFO_JITED_KSYMS,
-       BPF_PROG_INFO_JITED_FUNC_LENS,
-       BPF_PROG_INFO_FUNC_INFO,
-       BPF_PROG_INFO_LINE_INFO,
-       BPF_PROG_INFO_JITED_LINE_INFO,
-       BPF_PROG_INFO_PROG_TAGS,
-       BPF_PROG_INFO_LAST_ARRAY,
-};
-
-struct bpf_prog_info_linear {
-       /* size of struct bpf_prog_info, when the tool is compiled */
-       __u32                   info_len;
-       /* total bytes allocated for data, round up to 8 bytes */
-       __u32                   data_len;
-       /* which arrays are included in data */
-       __u64                   arrays;
-       struct bpf_prog_info    info;
-       __u8                    data[];
-};
-
-LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")
-LIBBPF_API struct bpf_prog_info_linear *
-bpf_program__get_prog_info_linear(int fd, __u64 arrays);
-
-LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")
-LIBBPF_API void
-bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
-
-LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")
-LIBBPF_API void
-bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
-
 /**
  * @brief **libbpf_num_possible_cpus()** is a helper function to get the
  * number of possible CPUs that the host kernel supports and expects.
index 116a2a8..94b589e 100644 (file)
@@ -1,29 +1,14 @@
 LIBBPF_0.0.1 {
        global:
                bpf_btf_get_fd_by_id;
-               bpf_create_map;
-               bpf_create_map_in_map;
-               bpf_create_map_in_map_node;
-               bpf_create_map_name;
-               bpf_create_map_node;
-               bpf_create_map_xattr;
-               bpf_load_btf;
-               bpf_load_program;
-               bpf_load_program_xattr;
                bpf_map__btf_key_type_id;
                bpf_map__btf_value_type_id;
-               bpf_map__def;
                bpf_map__fd;
-               bpf_map__is_offload_neutral;
                bpf_map__name;
-               bpf_map__next;
                bpf_map__pin;
-               bpf_map__prev;
-               bpf_map__priv;
                bpf_map__reuse_fd;
                bpf_map__set_ifindex;
                bpf_map__set_inner_map_fd;
-               bpf_map__set_priv;
                bpf_map__unpin;
                bpf_map_delete_elem;
                bpf_map_get_fd_by_id;
@@ -38,79 +23,37 @@ LIBBPF_0.0.1 {
                bpf_object__btf_fd;
                bpf_object__close;
                bpf_object__find_map_by_name;
-               bpf_object__find_map_by_offset;
-               bpf_object__find_program_by_title;
                bpf_object__kversion;
                bpf_object__load;
                bpf_object__name;
-               bpf_object__next;
                bpf_object__open;
-               bpf_object__open_buffer;
-               bpf_object__open_xattr;
                bpf_object__pin;
                bpf_object__pin_maps;
                bpf_object__pin_programs;
-               bpf_object__priv;
-               bpf_object__set_priv;
-               bpf_object__unload;
                bpf_object__unpin_maps;
                bpf_object__unpin_programs;
-               bpf_perf_event_read_simple;
                bpf_prog_attach;
                bpf_prog_detach;
                bpf_prog_detach2;
                bpf_prog_get_fd_by_id;
                bpf_prog_get_next_id;
-               bpf_prog_load;
-               bpf_prog_load_xattr;
                bpf_prog_query;
-               bpf_prog_test_run;
-               bpf_prog_test_run_xattr;
                bpf_program__fd;
-               bpf_program__is_kprobe;
-               bpf_program__is_perf_event;
-               bpf_program__is_raw_tracepoint;
-               bpf_program__is_sched_act;
-               bpf_program__is_sched_cls;
-               bpf_program__is_socket_filter;
-               bpf_program__is_tracepoint;
-               bpf_program__is_xdp;
-               bpf_program__load;
-               bpf_program__next;
-               bpf_program__nth_fd;
                bpf_program__pin;
-               bpf_program__pin_instance;
-               bpf_program__prev;
-               bpf_program__priv;
                bpf_program__set_expected_attach_type;
                bpf_program__set_ifindex;
-               bpf_program__set_kprobe;
-               bpf_program__set_perf_event;
-               bpf_program__set_prep;
-               bpf_program__set_priv;
-               bpf_program__set_raw_tracepoint;
-               bpf_program__set_sched_act;
-               bpf_program__set_sched_cls;
-               bpf_program__set_socket_filter;
-               bpf_program__set_tracepoint;
                bpf_program__set_type;
-               bpf_program__set_xdp;
-               bpf_program__title;
                bpf_program__unload;
                bpf_program__unpin;
-               bpf_program__unpin_instance;
                bpf_prog_linfo__free;
                bpf_prog_linfo__new;
                bpf_prog_linfo__lfind_addr_func;
                bpf_prog_linfo__lfind;
                bpf_raw_tracepoint_open;
-               bpf_set_link_xdp_fd;
                bpf_task_fd_query;
-               bpf_verify_program;
                btf__fd;
                btf__find_by_name;
                btf__free;
-               btf__get_from_id;
                btf__name_by_offset;
                btf__new;
                btf__resolve_size;
@@ -127,48 +70,24 @@ LIBBPF_0.0.1 {
 
 LIBBPF_0.0.2 {
        global:
-               bpf_probe_helper;
-               bpf_probe_map_type;
-               bpf_probe_prog_type;
-               bpf_map__resize;
                bpf_map_lookup_elem_flags;
                bpf_object__btf;
                bpf_object__find_map_fd_by_name;
-               bpf_get_link_xdp_id;
-               btf__dedup;
-               btf__get_map_kv_tids;
-               btf__get_nr_types;
                btf__get_raw_data;
-               btf__load;
                btf_ext__free;
-               btf_ext__func_info_rec_size;
                btf_ext__get_raw_data;
-               btf_ext__line_info_rec_size;
                btf_ext__new;
-               btf_ext__reloc_func_info;
-               btf_ext__reloc_line_info;
-               xsk_umem__create;
-               xsk_socket__create;
-               xsk_umem__delete;
-               xsk_socket__delete;
-               xsk_umem__fd;
-               xsk_socket__fd;
-               bpf_program__get_prog_info_linear;
-               bpf_program__bpil_addr_to_offs;
-               bpf_program__bpil_offs_to_addr;
 } LIBBPF_0.0.1;
 
 LIBBPF_0.0.3 {
        global:
                bpf_map__is_internal;
                bpf_map_freeze;
-               btf__finalize_data;
 } LIBBPF_0.0.2;
 
 LIBBPF_0.0.4 {
        global:
                bpf_link__destroy;
-               bpf_object__load_xattr;
                bpf_program__attach_kprobe;
                bpf_program__attach_perf_event;
                bpf_program__attach_raw_tracepoint;
@@ -176,14 +95,10 @@ LIBBPF_0.0.4 {
                bpf_program__attach_uprobe;
                btf_dump__dump_type;
                btf_dump__free;
-               btf_dump__new;
                btf__parse_elf;
                libbpf_num_possible_cpus;
                perf_buffer__free;
-               perf_buffer__new;
-               perf_buffer__new_raw;
                perf_buffer__poll;
-               xsk_umem__create;
 } LIBBPF_0.0.3;
 
 LIBBPF_0.0.5 {
@@ -193,7 +108,6 @@ LIBBPF_0.0.5 {
 
 LIBBPF_0.0.6 {
        global:
-               bpf_get_link_xdp_info;
                bpf_map__get_pin_path;
                bpf_map__is_pinned;
                bpf_map__set_pin_path;
@@ -202,9 +116,6 @@ LIBBPF_0.0.6 {
                bpf_program__attach_trace;
                bpf_program__get_expected_attach_type;
                bpf_program__get_type;
-               bpf_program__is_tracing;
-               bpf_program__set_tracing;
-               bpf_program__size;
                btf__find_by_name_kind;
                libbpf_find_vmlinux_btf_id;
 } LIBBPF_0.0.5;
@@ -224,14 +135,8 @@ LIBBPF_0.0.7 {
                bpf_object__detach_skeleton;
                bpf_object__load_skeleton;
                bpf_object__open_skeleton;
-               bpf_probe_large_insn_limit;
-               bpf_prog_attach_xattr;
                bpf_program__attach;
                bpf_program__name;
-               bpf_program__is_extension;
-               bpf_program__is_struct_ops;
-               bpf_program__set_extension;
-               bpf_program__set_struct_ops;
                btf__align_of;
                libbpf_find_kernel_btf;
 } LIBBPF_0.0.6;
@@ -250,10 +155,7 @@ LIBBPF_0.0.8 {
                bpf_prog_attach_opts;
                bpf_program__attach_cgroup;
                bpf_program__attach_lsm;
-               bpf_program__is_lsm;
                bpf_program__set_attach_target;
-               bpf_program__set_lsm;
-               bpf_set_link_xdp_fd_opts;
 } LIBBPF_0.0.7;
 
 LIBBPF_0.0.9 {
@@ -291,9 +193,7 @@ LIBBPF_0.1.0 {
                bpf_map__value_size;
                bpf_program__attach_xdp;
                bpf_program__autoload;
-               bpf_program__is_sk_lookup;
                bpf_program__set_autoload;
-               bpf_program__set_sk_lookup;
                btf__parse;
                btf__parse_raw;
                btf__pointer_size;
@@ -336,7 +236,6 @@ LIBBPF_0.2.0 {
                perf_buffer__buffer_fd;
                perf_buffer__epoll_fd;
                perf_buffer__consume_buffer;
-               xsk_socket__create_shared;
 } LIBBPF_0.1.0;
 
 LIBBPF_0.3.0 {
@@ -348,8 +247,6 @@ LIBBPF_0.3.0 {
                btf__new_empty_split;
                btf__new_split;
                ring_buffer__epoll_fd;
-               xsk_setup_xdp_prog;
-               xsk_socket__update_xskmap;
 } LIBBPF_0.2.0;
 
 LIBBPF_0.4.0 {
@@ -397,7 +294,6 @@ LIBBPF_0.6.0 {
                bpf_object__next_program;
                bpf_object__prev_map;
                bpf_object__prev_program;
-               bpf_prog_load_deprecated;
                bpf_prog_load;
                bpf_program__flags;
                bpf_program__insn_cnt;
@@ -407,18 +303,14 @@ LIBBPF_0.6.0 {
                btf__add_decl_tag;
                btf__add_type_tag;
                btf__dedup;
-               btf__dedup_deprecated;
                btf__raw_data;
                btf__type_cnt;
                btf_dump__new;
-               btf_dump__new_deprecated;
                libbpf_major_version;
                libbpf_minor_version;
                libbpf_version_string;
                perf_buffer__new;
-               perf_buffer__new_deprecated;
                perf_buffer__new_raw;
-               perf_buffer__new_raw_deprecated;
 } LIBBPF_0.5.0;
 
 LIBBPF_0.7.0 {
@@ -434,10 +326,11 @@ LIBBPF_0.7.0 {
                bpf_xdp_detach;
                bpf_xdp_query;
                bpf_xdp_query_id;
+               btf_ext__raw_data;
                libbpf_probe_bpf_helper;
                libbpf_probe_bpf_map_type;
                libbpf_probe_bpf_prog_type;
-               libbpf_set_memlock_rlim_max;
+               libbpf_set_memlock_rlim;
 } LIBBPF_0.6.0;
 
 LIBBPF_0.8.0 {
@@ -462,12 +355,11 @@ LIBBPF_0.8.0 {
 
 LIBBPF_1.0.0 {
        global:
+               bpf_prog_query_opts;
                btf__add_enum64;
                btf__add_enum64_value;
                libbpf_bpf_attach_type_str;
                libbpf_bpf_link_type_str;
                libbpf_bpf_map_type_str;
                libbpf_bpf_prog_type_str;
-
-       local: *;
 };
index 000e377..9a7937f 100644 (file)
 /* Add checks for other versions below when planning deprecation of API symbols
  * with the LIBBPF_DEPRECATED_SINCE macro.
  */
-#if __LIBBPF_CURRENT_VERSION_GEQ(0, 6)
-#define __LIBBPF_MARK_DEPRECATED_0_6(X) X
+#if __LIBBPF_CURRENT_VERSION_GEQ(1, 0)
+#define __LIBBPF_MARK_DEPRECATED_1_0(X) X
 #else
-#define __LIBBPF_MARK_DEPRECATED_0_6(X)
-#endif
-#if __LIBBPF_CURRENT_VERSION_GEQ(0, 7)
-#define __LIBBPF_MARK_DEPRECATED_0_7(X) X
-#else
-#define __LIBBPF_MARK_DEPRECATED_0_7(X)
-#endif
-#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8)
-#define __LIBBPF_MARK_DEPRECATED_0_8(X) X
-#else
-#define __LIBBPF_MARK_DEPRECATED_0_8(X)
+#define __LIBBPF_MARK_DEPRECATED_1_0(X)
 #endif
 
 /* This set of internal macros allows to do "function overloading" based on
index a1ad145..9cd7829 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/err.h>
 #include <fcntl.h>
 #include <unistd.h>
-#include "libbpf_legacy.h"
 #include "relo_core.h"
 
 /* make sure libbpf doesn't use kernel-only integer typedefs */
@@ -478,8 +477,6 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
 __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
                                 __u32 kind);
 
-extern enum libbpf_strict_mode libbpf_mode;
-
 typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type,
                             const char *sym_name, void *ctx);
 
@@ -498,12 +495,8 @@ static inline int libbpf_err(int ret)
  */
 static inline int libbpf_err_errno(int ret)
 {
-       if (libbpf_mode & LIBBPF_STRICT_DIRECT_ERRS)
-               /* errno is already assumed to be set on error */
-               return ret < 0 ? -errno : ret;
-
-       /* legacy: on error return -1 directly and don't touch errno */
-       return ret;
+       /* errno is already assumed to be set on error */
+       return ret < 0 ? -errno : ret;
 }
 
 /* handle error for pointer-returning APIs, err is assumed to be < 0 always */
@@ -511,12 +504,7 @@ static inline void *libbpf_err_ptr(int err)
 {
        /* set errno on error, this doesn't break anything */
        errno = -err;
-
-       if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
-               return NULL;
-
-       /* legacy: encode err as ptr */
-       return ERR_PTR(err);
+       return NULL;
 }
 
 /* handle pointer-returning APIs' error handling */
@@ -526,11 +514,7 @@ static inline void *libbpf_ptr(void *ret)
        if (IS_ERR(ret))
                errno = -PTR_ERR(ret);
 
-       if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
-               return IS_ERR(ret) ? NULL : ret;
-
-       /* legacy: pass-through original pointer */
-       return ret;
+       return IS_ERR(ret) ? NULL : ret;
 }
 
 static inline bool str_is_empty(const char *s)
index d7bcbd0..5b7e015 100644 (file)
 extern "C" {
 #endif
 
+/* As of libbpf 1.0 libbpf_set_strict_mode() and enum libbpf_struct_mode have
+ * no effect. But they are left in libbpf_legacy.h so that applications that
+ * prepared for libbpf 1.0 before final release by using
+ * libbpf_set_strict_mode() still work with libbpf 1.0+ without any changes.
+ */
 enum libbpf_strict_mode {
        /* Turn on all supported strict features of libbpf to simulate libbpf
         * v1.0 behavior.
@@ -71,8 +76,8 @@ enum libbpf_strict_mode {
         * first BPF program or map creation operation. This is done only if
         * kernel is too old to support memcg-based memory accounting for BPF
         * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY,
-        * but it can be overriden with libbpf_set_memlock_rlim_max() API.
-        * Note that libbpf_set_memlock_rlim_max() needs to be called before
+        * but it can be overriden with libbpf_set_memlock_rlim() API.
+        * Note that libbpf_set_memlock_rlim() needs to be called before
         * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load()
         * operation.
         */
@@ -88,6 +93,25 @@ enum libbpf_strict_mode {
 
 LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
 
+/**
+ * @brief **libbpf_get_error()** extracts the error code from the passed
+ * pointer
+ * @param ptr pointer returned from libbpf API function
+ * @return error code; or 0 if no error occured
+ *
+ * Note, as of libbpf 1.0 this function is not necessary and not recommended
+ * to be used. Libbpf doesn't return error code embedded into the pointer
+ * itself. Instead, NULL is returned on error and error code is passed through
+ * thread-local errno variable. **libbpf_get_error()** is just returning -errno
+ * value if it receives NULL, which is correct only if errno hasn't been
+ * modified between libbpf API call and corresponding **libbpf_get_error()**
+ * call. Prefer to check return for NULL and use errno directly.
+ *
+ * This API is left in libbpf 1.0 to allow applications that were 1.0-ready
+ * before final libbpf 1.0 without needing to change them.
+ */
+LIBBPF_API long libbpf_get_error(const void *ptr);
+
 #define DECLARE_LIBBPF_OPTS LIBBPF_OPTS
 
 /* "Discouraged" APIs which don't follow consistent libbpf naming patterns.
index 97b06ce..0b53987 100644 (file)
 #include "libbpf.h"
 #include "libbpf_internal.h"
 
-static bool grep(const char *buffer, const char *pattern)
-{
-       return !!strstr(buffer, pattern);
-}
-
-static int get_vendor_id(int ifindex)
-{
-       char ifname[IF_NAMESIZE], path[64], buf[8];
-       ssize_t len;
-       int fd;
-
-       if (!if_indextoname(ifindex, ifname))
-               return -1;
-
-       snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname);
-
-       fd = open(path, O_RDONLY | O_CLOEXEC);
-       if (fd < 0)
-               return -1;
-
-       len = read(fd, buf, sizeof(buf));
-       close(fd);
-       if (len < 0)
-               return -1;
-       if (len >= (ssize_t)sizeof(buf))
-               return -1;
-       buf[len] = '\0';
-
-       return strtol(buf, NULL, 0);
-}
-
 static int probe_prog_load(enum bpf_prog_type prog_type,
                           const struct bpf_insn *insns, size_t insns_cnt,
-                          char *log_buf, size_t log_buf_sz,
-                          __u32 ifindex)
+                          char *log_buf, size_t log_buf_sz)
 {
        LIBBPF_OPTS(bpf_prog_load_opts, opts,
                .log_buf = log_buf,
                .log_size = log_buf_sz,
                .log_level = log_buf ? 1 : 0,
-               .prog_ifindex = ifindex,
        );
        int fd, err, exp_err = 0;
        const char *exp_msg = NULL;
@@ -161,31 +128,10 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
        if (opts)
                return libbpf_err(-EINVAL);
 
-       ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0);
+       ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0);
        return libbpf_err(ret);
 }
 
-bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
-{
-       struct bpf_insn insns[2] = {
-               BPF_MOV64_IMM(BPF_REG_0, 0),
-               BPF_EXIT_INSN()
-       };
-
-       /* prefer libbpf_probe_bpf_prog_type() unless offload is requested */
-       if (ifindex == 0)
-               return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1;
-
-       if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS)
-               /* nfp returns -EINVAL on exit(0) with TC offload */
-               insns[0].imm = 2;
-
-       errno = 0;
-       probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);
-
-       return errno != EINVAL && errno != EOPNOTSUPP;
-}
-
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
                         const char *str_sec, size_t str_len)
 {
@@ -242,15 +188,13 @@ static int load_local_storage_btf(void)
                                     strs, sizeof(strs));
 }
 
-static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)
+static int probe_map_create(enum bpf_map_type map_type)
 {
        LIBBPF_OPTS(bpf_map_create_opts, opts);
        int key_size, value_size, max_entries;
        __u32 btf_key_type_id = 0, btf_value_type_id = 0;
        int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err;
 
-       opts.map_ifindex = ifindex;
-
        key_size        = sizeof(__u32);
        value_size      = sizeof(__u32);
        max_entries     = 1;
@@ -326,12 +270,6 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)
 
        if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
            map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
-               /* TODO: probe for device, once libbpf has a function to create
-                * map-in-map for offload
-                */
-               if (ifindex)
-                       goto cleanup;
-
                fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
                                          sizeof(__u32), sizeof(__u32), 1, NULL);
                if (fd_inner < 0)
@@ -370,15 +308,10 @@ int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts)
        if (opts)
                return libbpf_err(-EINVAL);
 
-       ret = probe_map_create(map_type, 0);
+       ret = probe_map_create(map_type);
        return libbpf_err(ret);
 }
 
-bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
-{
-       return probe_map_create(map_type, ifindex) == 1;
-}
-
 int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id,
                            const void *opts)
 {
@@ -407,7 +340,7 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe
        }
 
        buf[0] = '\0';
-       ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0);
+       ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf));
        if (ret < 0)
                return libbpf_err(ret);
 
@@ -427,51 +360,3 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe
                return 0;
        return 1; /* assume supported */
 }
-
-bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
-                     __u32 ifindex)
-{
-       struct bpf_insn insns[2] = {
-               BPF_EMIT_CALL(id),
-               BPF_EXIT_INSN()
-       };
-       char buf[4096] = {};
-       bool res;
-
-       probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex);
-       res = !grep(buf, "invalid func ") && !grep(buf, "unknown func ");
-
-       if (ifindex) {
-               switch (get_vendor_id(ifindex)) {
-               case 0x19ee: /* Netronome specific */
-                       res = res && !grep(buf, "not supported by FW") &&
-                               !grep(buf, "unsupported function id");
-                       break;
-               default:
-                       break;
-               }
-       }
-
-       return res;
-}
-
-/*
- * Probe for availability of kernel commit (5.3):
- *
- * c04c0d2b968a ("bpf: increase complexity limit and maximum program size")
- */
-bool bpf_probe_large_insn_limit(__u32 ifindex)
-{
-       struct bpf_insn insns[BPF_MAXINSNS + 1];
-       int i;
-
-       for (i = 0; i < BPF_MAXINSNS; i++)
-               insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1);
-       insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
-
-       errno = 0;
-       probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0,
-                       ifindex);
-
-       return errno != E2BIG && errno != EINVAL;
-}
index cbc8967..6c01316 100644 (file)
@@ -27,6 +27,14 @@ typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
 typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
                              void *cookie);
 
+struct xdp_link_info {
+       __u32 prog_id;
+       __u32 drv_prog_id;
+       __u32 hw_prog_id;
+       __u32 skb_prog_id;
+       __u8 attach_mode;
+};
+
 struct xdp_id_md {
        int ifindex;
        __u32 flags;
@@ -288,31 +296,6 @@ int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *o
        return bpf_xdp_attach(ifindex, -1, flags, opts);
 }
 
-int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
-                            const struct bpf_xdp_set_link_opts *opts)
-{
-       int old_fd = -1, ret;
-
-       if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
-               return libbpf_err(-EINVAL);
-
-       if (OPTS_HAS(opts, old_fd)) {
-               old_fd = OPTS_GET(opts, old_fd, -1);
-               flags |= XDP_FLAGS_REPLACE;
-       }
-
-       ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
-       return libbpf_err(ret);
-}
-
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
-{
-       int ret;
-
-       ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
-       return libbpf_err(ret);
-}
-
 static int __dump_link_nlmsg(struct nlmsghdr *nlh,
                             libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
 {
@@ -413,30 +396,6 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
        return 0;
 }
 
-int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
-                         size_t info_size, __u32 flags)
-{
-       LIBBPF_OPTS(bpf_xdp_query_opts, opts);
-       size_t sz;
-       int err;
-
-       if (!info_size)
-               return libbpf_err(-EINVAL);
-
-       err = bpf_xdp_query(ifindex, flags, &opts);
-       if (err)
-               return libbpf_err(err);
-
-       /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts
-        * layout after sz field
-        */
-       sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode));
-       memcpy(info, &opts.prog_id, sz);
-       memset((void *)info + sz, 0, info_size - sz);
-
-       return 0;
-}
-
 int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
 {
        LIBBPF_OPTS(bpf_xdp_query_opts, opts);
@@ -463,11 +422,6 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
 }
 
 
-int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
-{
-       return bpf_xdp_query_id(ifindex, flags, prog_id);
-}
-
 typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
 
 static int clsact_config(struct libbpf_nla_req *req)
index 6ad3c38..c4b0e81 100644 (file)
@@ -95,6 +95,7 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
        case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id";
        case BPF_CORE_TYPE_ID_TARGET: return "target_type_id";
        case BPF_CORE_TYPE_EXISTS: return "type_exists";
+       case BPF_CORE_TYPE_MATCHES: return "type_matches";
        case BPF_CORE_TYPE_SIZE: return "type_size";
        case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists";
        case BPF_CORE_ENUMVAL_VALUE: return "enumval_value";
@@ -123,6 +124,7 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
        case BPF_CORE_TYPE_ID_LOCAL:
        case BPF_CORE_TYPE_ID_TARGET:
        case BPF_CORE_TYPE_EXISTS:
+       case BPF_CORE_TYPE_MATCHES:
        case BPF_CORE_TYPE_SIZE:
                return true;
        default:
@@ -141,6 +143,86 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
        }
 }
 
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+                               const struct btf *targ_btf, __u32 targ_id, int level)
+{
+       const struct btf_type *local_type, *targ_type;
+       int depth = 32; /* max recursion depth */
+
+       /* caller made sure that names match (ignoring flavor suffix) */
+       local_type = btf_type_by_id(local_btf, local_id);
+       targ_type = btf_type_by_id(targ_btf, targ_id);
+       if (!btf_kind_core_compat(local_type, targ_type))
+               return 0;
+
+recur:
+       depth--;
+       if (depth < 0)
+               return -EINVAL;
+
+       local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+       if (!local_type || !targ_type)
+               return -EINVAL;
+
+       if (!btf_kind_core_compat(local_type, targ_type))
+               return 0;
+
+       switch (btf_kind(local_type)) {
+       case BTF_KIND_UNKN:
+       case BTF_KIND_STRUCT:
+       case BTF_KIND_UNION:
+       case BTF_KIND_ENUM:
+       case BTF_KIND_FWD:
+       case BTF_KIND_ENUM64:
+               return 1;
+       case BTF_KIND_INT:
+               /* just reject deprecated bitfield-like integers; all other
+                * integers are by default compatible between each other
+                */
+               return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+       case BTF_KIND_PTR:
+               local_id = local_type->type;
+               targ_id = targ_type->type;
+               goto recur;
+       case BTF_KIND_ARRAY:
+               local_id = btf_array(local_type)->type;
+               targ_id = btf_array(targ_type)->type;
+               goto recur;
+       case BTF_KIND_FUNC_PROTO: {
+               struct btf_param *local_p = btf_params(local_type);
+               struct btf_param *targ_p = btf_params(targ_type);
+               __u16 local_vlen = btf_vlen(local_type);
+               __u16 targ_vlen = btf_vlen(targ_type);
+               int i, err;
+
+               if (local_vlen != targ_vlen)
+                       return 0;
+
+               for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+                       if (level <= 0)
+                               return -EINVAL;
+
+                       skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
+                       skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
+                       err = __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
+                                                         level - 1);
+                       if (err <= 0)
+                               return err;
+               }
+
+               /* tail recurse for return type check */
+               skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
+               skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
+               goto recur;
+       }
+       default:
+               pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+                       btf_kind_str(local_type), local_id, targ_id);
+               return 0;
+       }
+}
+
 /*
  * Turn bpf_core_relo into a low- and high-level spec representation,
  * validating correctness along the way, as well as calculating resulting
@@ -171,7 +253,7 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
  *   - field 'a' access (corresponds to '2' in low-level spec);
  *   - array element #3 access (corresponds to '3' in low-level spec).
  *
- * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * Type-based relocations (TYPE_EXISTS/TYPE_MATCHES/TYPE_SIZE,
  * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
  * spec and raw_spec are kept empty.
  *
@@ -488,9 +570,14 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
        targ_spec->relo_kind = local_spec->relo_kind;
 
        if (core_relo_is_type_based(local_spec->relo_kind)) {
-               return bpf_core_types_are_compat(local_spec->btf,
-                                                local_spec->root_type_id,
-                                                targ_btf, targ_id);
+               if (local_spec->relo_kind == BPF_CORE_TYPE_MATCHES)
+                       return bpf_core_types_match(local_spec->btf,
+                                                   local_spec->root_type_id,
+                                                   targ_btf, targ_id);
+               else
+                       return bpf_core_types_are_compat(local_spec->btf,
+                                                        local_spec->root_type_id,
+                                                        targ_btf, targ_id);
        }
 
        local_acc = &local_spec->spec[0];
@@ -739,6 +826,7 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
                        *validate = false;
                break;
        case BPF_CORE_TYPE_EXISTS:
+       case BPF_CORE_TYPE_MATCHES:
                *val = 1;
                break;
        case BPF_CORE_TYPE_SIZE:
@@ -1330,3 +1418,273 @@ int bpf_core_calc_relo_insn(const char *prog_name,
 
        return 0;
 }
+
+static bool bpf_core_names_match(const struct btf *local_btf, size_t local_name_off,
+                                const struct btf *targ_btf, size_t targ_name_off)
+{
+       const char *local_n, *targ_n;
+       size_t local_len, targ_len;
+
+       local_n = btf__name_by_offset(local_btf, local_name_off);
+       targ_n = btf__name_by_offset(targ_btf, targ_name_off);
+
+       if (str_is_empty(targ_n))
+               return str_is_empty(local_n);
+
+       targ_len = bpf_core_essential_name_len(targ_n);
+       local_len = bpf_core_essential_name_len(local_n);
+
+       return targ_len == local_len && strncmp(local_n, targ_n, local_len) == 0;
+}
+
+static int bpf_core_enums_match(const struct btf *local_btf, const struct btf_type *local_t,
+                               const struct btf *targ_btf, const struct btf_type *targ_t)
+{
+       __u16 local_vlen = btf_vlen(local_t);
+       __u16 targ_vlen = btf_vlen(targ_t);
+       int i, j;
+
+       if (local_t->size != targ_t->size)
+               return 0;
+
+       if (local_vlen > targ_vlen)
+               return 0;
+
+       /* iterate over the local enum's variants and make sure each has
+        * a symbolic name correspondent in the target
+        */
+       for (i = 0; i < local_vlen; i++) {
+               bool matched = false;
+               __u32 local_n_off, targ_n_off;
+
+               local_n_off = btf_is_enum(local_t) ? btf_enum(local_t)[i].name_off :
+                                                    btf_enum64(local_t)[i].name_off;
+
+               for (j = 0; j < targ_vlen; j++) {
+                       targ_n_off = btf_is_enum(targ_t) ? btf_enum(targ_t)[j].name_off :
+                                                          btf_enum64(targ_t)[j].name_off;
+
+                       if (bpf_core_names_match(local_btf, local_n_off, targ_btf, targ_n_off)) {
+                               matched = true;
+                               break;
+                       }
+               }
+
+               if (!matched)
+                       return 0;
+       }
+       return 1;
+}
+
+static int bpf_core_composites_match(const struct btf *local_btf, const struct btf_type *local_t,
+                                    const struct btf *targ_btf, const struct btf_type *targ_t,
+                                    bool behind_ptr, int level)
+{
+       const struct btf_member *local_m = btf_members(local_t);
+       __u16 local_vlen = btf_vlen(local_t);
+       __u16 targ_vlen = btf_vlen(targ_t);
+       int i, j, err;
+
+       if (local_vlen > targ_vlen)
+               return 0;
+
+       /* check that all local members have a match in the target */
+       for (i = 0; i < local_vlen; i++, local_m++) {
+               const struct btf_member *targ_m = btf_members(targ_t);
+               bool matched = false;
+
+               for (j = 0; j < targ_vlen; j++, targ_m++) {
+                       if (!bpf_core_names_match(local_btf, local_m->name_off,
+                                                 targ_btf, targ_m->name_off))
+                               continue;
+
+                       err = __bpf_core_types_match(local_btf, local_m->type, targ_btf,
+                                                    targ_m->type, behind_ptr, level - 1);
+                       if (err < 0)
+                               return err;
+                       if (err > 0) {
+                               matched = true;
+                               break;
+                       }
+               }
+
+               if (!matched)
+                       return 0;
+       }
+       return 1;
+}
+
+/* Check that two types "match". This function assumes that root types were
+ * already checked for name match.
+ *
+ * The matching relation is defined as follows:
+ * - modifiers and typedefs are stripped (and, hence, effectively ignored)
+ * - generally speaking types need to be of same kind (struct vs. struct, union
+ *   vs. union, etc.)
+ *   - exceptions are struct/union behind a pointer which could also match a
+ *     forward declaration of a struct or union, respectively, and enum vs.
+ *     enum64 (see below)
+ * Then, depending on type:
+ * - integers:
+ *   - match if size and signedness match
+ * - arrays & pointers:
+ *   - target types are recursively matched
+ * - structs & unions:
+ *   - local members need to exist in target with the same name
+ *   - for each member we recursively check match unless it is already behind a
+ *     pointer, in which case we only check matching names and compatible kind
+ * - enums:
+ *   - local variants have to have a match in target by symbolic name (but not
+ *     numeric value)
+ *   - size has to match (but enum may match enum64 and vice versa)
+ * - function pointers:
+ *   - number and position of arguments in local type has to match target
+ *   - for each argument and the return value we recursively check match
+ */
+int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+                          __u32 targ_id, bool behind_ptr, int level)
+{
+       const struct btf_type *local_t, *targ_t;
+       int depth = 32; /* max recursion depth */
+       __u16 local_k, targ_k;
+
+       if (level <= 0)
+               return -EINVAL;
+
+       local_t = btf_type_by_id(local_btf, local_id);
+       targ_t = btf_type_by_id(targ_btf, targ_id);
+
+recur:
+       depth--;
+       if (depth < 0)
+               return -EINVAL;
+
+       local_t = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+       targ_t = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+       if (!local_t || !targ_t)
+               return -EINVAL;
+
+       /* While the name check happens after typedefs are skipped, root-level
+        * typedefs would still be name-matched as that's the contract with
+        * callers.
+        */
+       if (!bpf_core_names_match(local_btf, local_t->name_off, targ_btf, targ_t->name_off))
+               return 0;
+
+       local_k = btf_kind(local_t);
+       targ_k = btf_kind(targ_t);
+
+       switch (local_k) {
+       case BTF_KIND_UNKN:
+               return local_k == targ_k;
+       case BTF_KIND_FWD: {
+               bool local_f = BTF_INFO_KFLAG(local_t->info);
+
+               if (behind_ptr) {
+                       if (local_k == targ_k)
+                               return local_f == BTF_INFO_KFLAG(targ_t->info);
+
+                       /* for forward declarations kflag dictates whether the
+                        * target is a struct (0) or union (1)
+                        */
+                       return (targ_k == BTF_KIND_STRUCT && !local_f) ||
+                              (targ_k == BTF_KIND_UNION && local_f);
+               } else {
+                       if (local_k != targ_k)
+                               return 0;
+
+                       /* match if the forward declaration is for the same kind */
+                       return local_f == BTF_INFO_KFLAG(targ_t->info);
+               }
+       }
+       case BTF_KIND_ENUM:
+       case BTF_KIND_ENUM64:
+               if (!btf_is_any_enum(targ_t))
+                       return 0;
+
+               return bpf_core_enums_match(local_btf, local_t, targ_btf, targ_t);
+       case BTF_KIND_STRUCT:
+       case BTF_KIND_UNION:
+               if (behind_ptr) {
+                       bool targ_f = BTF_INFO_KFLAG(targ_t->info);
+
+                       if (local_k == targ_k)
+                               return 1;
+
+                       if (targ_k != BTF_KIND_FWD)
+                               return 0;
+
+                       return (local_k == BTF_KIND_UNION) == targ_f;
+               } else {
+                       if (local_k != targ_k)
+                               return 0;
+
+                       return bpf_core_composites_match(local_btf, local_t, targ_btf, targ_t,
+                                                        behind_ptr, level);
+               }
+       case BTF_KIND_INT: {
+               __u8 local_sgn;
+               __u8 targ_sgn;
+
+               if (local_k != targ_k)
+                       return 0;
+
+               local_sgn = btf_int_encoding(local_t) & BTF_INT_SIGNED;
+               targ_sgn = btf_int_encoding(targ_t) & BTF_INT_SIGNED;
+
+               return local_t->size == targ_t->size && local_sgn == targ_sgn;
+       }
+       case BTF_KIND_PTR:
+               if (local_k != targ_k)
+                       return 0;
+
+               behind_ptr = true;
+
+               local_id = local_t->type;
+               targ_id = targ_t->type;
+               goto recur;
+       case BTF_KIND_ARRAY: {
+               const struct btf_array *local_array = btf_array(local_t);
+               const struct btf_array *targ_array = btf_array(targ_t);
+
+               if (local_k != targ_k)
+                       return 0;
+
+               if (local_array->nelems != targ_array->nelems)
+                       return 0;
+
+               local_id = local_array->type;
+               targ_id = targ_array->type;
+               goto recur;
+       }
+       case BTF_KIND_FUNC_PROTO: {
+               struct btf_param *local_p = btf_params(local_t);
+               struct btf_param *targ_p = btf_params(targ_t);
+               __u16 local_vlen = btf_vlen(local_t);
+               __u16 targ_vlen = btf_vlen(targ_t);
+               int i, err;
+
+               if (local_k != targ_k)
+                       return 0;
+
+               if (local_vlen != targ_vlen)
+                       return 0;
+
+               for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+                       err = __bpf_core_types_match(local_btf, local_p->type, targ_btf,
+                                                    targ_p->type, behind_ptr, level - 1);
+                       if (err <= 0)
+                               return err;
+               }
+
+               /* tail recurse for return type check */
+               local_id = local_t->type;
+               targ_id = targ_t->type;
+               goto recur;
+       }
+       default:
+               pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+                       btf_kind_str(local_t), local_id, targ_id);
+               return 0;
+       }
+}
index 7df0da0..1c0566d 100644 (file)
@@ -68,8 +68,14 @@ struct bpf_core_relo_res {
        __u32 new_type_id;
 };
 
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+                               const struct btf *targ_btf, __u32 targ_id, int level);
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
                              const struct btf *targ_btf, __u32 targ_id);
+int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+                          __u32 targ_id, bool behind_ptr, int level);
+int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+                        __u32 targ_id);
 
 size_t bpf_core_essential_name_len(const char *name);
 
index 5159207..d18e379 100644 (file)
@@ -652,11 +652,9 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
                 *
                 *   [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
                 */
-               usdt_rel_ip = usdt_abs_ip = note.loc_addr;
-               if (base_addr) {
+               usdt_abs_ip = note.loc_addr;
+               if (base_addr)
                        usdt_abs_ip += base_addr - note.base_addr;
-                       usdt_rel_ip += base_addr - note.base_addr;
-               }
 
                /* When attaching uprobes (which is what USDTs basically are)
                 * kernel expects file offset to be specified, not a relative
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
deleted file mode 100644 (file)
index af136f7..0000000
+++ /dev/null
@@ -1,1260 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-/*
- * AF_XDP user-space access library.
- *
- * Copyright(c) 2018 - 2019 Intel Corporation.
- *
- * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
- */
-
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <arpa/inet.h>
-#include <asm/barrier.h>
-#include <linux/compiler.h>
-#include <linux/ethtool.h>
-#include <linux/filter.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_xdp.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/sockios.h>
-#include <net/if.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <linux/if_link.h>
-
-#include "bpf.h"
-#include "libbpf.h"
-#include "libbpf_internal.h"
-#include "xsk.h"
-
-/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal
- * uses of deprecated APIs
- */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#ifndef SOL_XDP
- #define SOL_XDP 283
-#endif
-
-#ifndef AF_XDP
- #define AF_XDP 44
-#endif
-
-#ifndef PF_XDP
- #define PF_XDP AF_XDP
-#endif
-
-enum xsk_prog {
-       XSK_PROG_FALLBACK,
-       XSK_PROG_REDIRECT_FLAGS,
-};
-
-struct xsk_umem {
-       struct xsk_ring_prod *fill_save;
-       struct xsk_ring_cons *comp_save;
-       char *umem_area;
-       struct xsk_umem_config config;
-       int fd;
-       int refcount;
-       struct list_head ctx_list;
-       bool rx_ring_setup_done;
-       bool tx_ring_setup_done;
-};
-
-struct xsk_ctx {
-       struct xsk_ring_prod *fill;
-       struct xsk_ring_cons *comp;
-       __u32 queue_id;
-       struct xsk_umem *umem;
-       int refcount;
-       int ifindex;
-       struct list_head list;
-       int prog_fd;
-       int link_fd;
-       int xsks_map_fd;
-       char ifname[IFNAMSIZ];
-       bool has_bpf_link;
-};
-
-struct xsk_socket {
-       struct xsk_ring_cons *rx;
-       struct xsk_ring_prod *tx;
-       __u64 outstanding_tx;
-       struct xsk_ctx *ctx;
-       struct xsk_socket_config config;
-       int fd;
-};
-
-struct xsk_nl_info {
-       bool xdp_prog_attached;
-       int ifindex;
-       int fd;
-};
-
-/* Up until and including Linux 5.3 */
-struct xdp_ring_offset_v1 {
-       __u64 producer;
-       __u64 consumer;
-       __u64 desc;
-};
-
-/* Up until and including Linux 5.3 */
-struct xdp_mmap_offsets_v1 {
-       struct xdp_ring_offset_v1 rx;
-       struct xdp_ring_offset_v1 tx;
-       struct xdp_ring_offset_v1 fr;
-       struct xdp_ring_offset_v1 cr;
-};
-
-int xsk_umem__fd(const struct xsk_umem *umem)
-{
-       return umem ? umem->fd : -EINVAL;
-}
-
-int xsk_socket__fd(const struct xsk_socket *xsk)
-{
-       return xsk ? xsk->fd : -EINVAL;
-}
-
-static bool xsk_page_aligned(void *buffer)
-{
-       unsigned long addr = (unsigned long)buffer;
-
-       return !(addr & (getpagesize() - 1));
-}
-
-static void xsk_set_umem_config(struct xsk_umem_config *cfg,
-                               const struct xsk_umem_config *usr_cfg)
-{
-       if (!usr_cfg) {
-               cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-               cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-               cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
-               cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
-               cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
-               return;
-       }
-
-       cfg->fill_size = usr_cfg->fill_size;
-       cfg->comp_size = usr_cfg->comp_size;
-       cfg->frame_size = usr_cfg->frame_size;
-       cfg->frame_headroom = usr_cfg->frame_headroom;
-       cfg->flags = usr_cfg->flags;
-}
-
-static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
-                                    const struct xsk_socket_config *usr_cfg)
-{
-       if (!usr_cfg) {
-               cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-               cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-               cfg->libbpf_flags = 0;
-               cfg->xdp_flags = 0;
-               cfg->bind_flags = 0;
-               return 0;
-       }
-
-       if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
-               return -EINVAL;
-
-       cfg->rx_size = usr_cfg->rx_size;
-       cfg->tx_size = usr_cfg->tx_size;
-       cfg->libbpf_flags = usr_cfg->libbpf_flags;
-       cfg->xdp_flags = usr_cfg->xdp_flags;
-       cfg->bind_flags = usr_cfg->bind_flags;
-
-       return 0;
-}
-
-static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
-{
-       struct xdp_mmap_offsets_v1 off_v1;
-
-       /* getsockopt on a kernel <= 5.3 has no flags fields.
-        * Copy over the offsets to the correct places in the >=5.4 format
-        * and put the flags where they would have been on that kernel.
-        */
-       memcpy(&off_v1, off, sizeof(off_v1));
-
-       off->rx.producer = off_v1.rx.producer;
-       off->rx.consumer = off_v1.rx.consumer;
-       off->rx.desc = off_v1.rx.desc;
-       off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
-
-       off->tx.producer = off_v1.tx.producer;
-       off->tx.consumer = off_v1.tx.consumer;
-       off->tx.desc = off_v1.tx.desc;
-       off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
-
-       off->fr.producer = off_v1.fr.producer;
-       off->fr.consumer = off_v1.fr.consumer;
-       off->fr.desc = off_v1.fr.desc;
-       off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
-
-       off->cr.producer = off_v1.cr.producer;
-       off->cr.consumer = off_v1.cr.consumer;
-       off->cr.desc = off_v1.cr.desc;
-       off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
-}
-
-static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
-{
-       socklen_t optlen;
-       int err;
-
-       optlen = sizeof(*off);
-       err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
-       if (err)
-               return err;
-
-       if (optlen == sizeof(*off))
-               return 0;
-
-       if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
-               xsk_mmap_offsets_v1(off);
-               return 0;
-       }
-
-       return -EINVAL;
-}
-
-static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
-                                struct xsk_ring_prod *fill,
-                                struct xsk_ring_cons *comp)
-{
-       struct xdp_mmap_offsets off;
-       void *map;
-       int err;
-
-       err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
-                        &umem->config.fill_size,
-                        sizeof(umem->config.fill_size));
-       if (err)
-               return -errno;
-
-       err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
-                        &umem->config.comp_size,
-                        sizeof(umem->config.comp_size));
-       if (err)
-               return -errno;
-
-       err = xsk_get_mmap_offsets(fd, &off);
-       if (err)
-               return -errno;
-
-       map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
-                  PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
-                  XDP_UMEM_PGOFF_FILL_RING);
-       if (map == MAP_FAILED)
-               return -errno;
-
-       fill->mask = umem->config.fill_size - 1;
-       fill->size = umem->config.fill_size;
-       fill->producer = map + off.fr.producer;
-       fill->consumer = map + off.fr.consumer;
-       fill->flags = map + off.fr.flags;
-       fill->ring = map + off.fr.desc;
-       fill->cached_cons = umem->config.fill_size;
-
-       map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
-                  PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
-                  XDP_UMEM_PGOFF_COMPLETION_RING);
-       if (map == MAP_FAILED) {
-               err = -errno;
-               goto out_mmap;
-       }
-
-       comp->mask = umem->config.comp_size - 1;
-       comp->size = umem->config.comp_size;
-       comp->producer = map + off.cr.producer;
-       comp->consumer = map + off.cr.consumer;
-       comp->flags = map + off.cr.flags;
-       comp->ring = map + off.cr.desc;
-
-       return 0;
-
-out_mmap:
-       munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
-       return err;
-}
-
-DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
-int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
-                           __u64 size, struct xsk_ring_prod *fill,
-                           struct xsk_ring_cons *comp,
-                           const struct xsk_umem_config *usr_config)
-{
-       struct xdp_umem_reg mr;
-       struct xsk_umem *umem;
-       int err;
-
-       if (!umem_area || !umem_ptr || !fill || !comp)
-               return -EFAULT;
-       if (!size && !xsk_page_aligned(umem_area))
-               return -EINVAL;
-
-       umem = calloc(1, sizeof(*umem));
-       if (!umem)
-               return -ENOMEM;
-
-       umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
-       if (umem->fd < 0) {
-               err = -errno;
-               goto out_umem_alloc;
-       }
-
-       umem->umem_area = umem_area;
-       INIT_LIST_HEAD(&umem->ctx_list);
-       xsk_set_umem_config(&umem->config, usr_config);
-
-       memset(&mr, 0, sizeof(mr));
-       mr.addr = (uintptr_t)umem_area;
-       mr.len = size;
-       mr.chunk_size = umem->config.frame_size;
-       mr.headroom = umem->config.frame_headroom;
-       mr.flags = umem->config.flags;
-
-       err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
-       if (err) {
-               err = -errno;
-               goto out_socket;
-       }
-
-       err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
-       if (err)
-               goto out_socket;
-
-       umem->fill_save = fill;
-       umem->comp_save = comp;
-       *umem_ptr = umem;
-       return 0;
-
-out_socket:
-       close(umem->fd);
-out_umem_alloc:
-       free(umem);
-       return err;
-}
-
-struct xsk_umem_config_v1 {
-       __u32 fill_size;
-       __u32 comp_size;
-       __u32 frame_size;
-       __u32 frame_headroom;
-};
-
-COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
-int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
-                           __u64 size, struct xsk_ring_prod *fill,
-                           struct xsk_ring_cons *comp,
-                           const struct xsk_umem_config *usr_config)
-{
-       struct xsk_umem_config config;
-
-       memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1));
-       config.flags = 0;
-
-       return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
-                                       &config);
-}
-
-static enum xsk_prog get_xsk_prog(void)
-{
-       enum xsk_prog detected = XSK_PROG_FALLBACK;
-       __u32 size_out, retval, duration;
-       char data_in = 0, data_out;
-       struct bpf_insn insns[] = {
-               BPF_LD_MAP_FD(BPF_REG_1, 0),
-               BPF_MOV64_IMM(BPF_REG_2, 0),
-               BPF_MOV64_IMM(BPF_REG_3, XDP_PASS),
-               BPF_EMIT_CALL(BPF_FUNC_redirect_map),
-               BPF_EXIT_INSN(),
-       };
-       int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns);
-
-       map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL);
-       if (map_fd < 0)
-               return detected;
-
-       insns[0].imm = map_fd;
-
-       prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
-       if (prog_fd < 0) {
-               close(map_fd);
-               return detected;
-       }
-
-       ret = bpf_prog_test_run(prog_fd, 0, &data_in, 1, &data_out, &size_out, &retval, &duration);
-       if (!ret && retval == XDP_PASS)
-               detected = XSK_PROG_REDIRECT_FLAGS;
-       close(prog_fd);
-       close(map_fd);
-       return detected;
-}
-
-static int xsk_load_xdp_prog(struct xsk_socket *xsk)
-{
-       static const int log_buf_size = 16 * 1024;
-       struct xsk_ctx *ctx = xsk->ctx;
-       char log_buf[log_buf_size];
-       int prog_fd;
-
-       /* This is the fallback C-program:
-        * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
-        * {
-        *     int ret, index = ctx->rx_queue_index;
-        *
-        *     // A set entry here means that the correspnding queue_id
-        *     // has an active AF_XDP socket bound to it.
-        *     ret = bpf_redirect_map(&xsks_map, index, XDP_PASS);
-        *     if (ret > 0)
-        *         return ret;
-        *
-        *     // Fallback for pre-5.3 kernels, not supporting default
-        *     // action in the flags parameter.
-        *     if (bpf_map_lookup_elem(&xsks_map, &index))
-        *         return bpf_redirect_map(&xsks_map, index, 0);
-        *     return XDP_PASS;
-        * }
-        */
-       struct bpf_insn prog[] = {
-               /* r2 = *(u32 *)(r1 + 16) */
-               BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
-               /* *(u32 *)(r10 - 4) = r2 */
-               BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
-               /* r1 = xskmap[] */
-               BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
-               /* r3 = XDP_PASS */
-               BPF_MOV64_IMM(BPF_REG_3, 2),
-               /* call bpf_redirect_map */
-               BPF_EMIT_CALL(BPF_FUNC_redirect_map),
-               /* if w0 != 0 goto pc+13 */
-               BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13),
-               /* r2 = r10 */
-               BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-               /* r2 += -4 */
-               BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
-               /* r1 = xskmap[] */
-               BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
-               /* call bpf_map_lookup_elem */
-               BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
-               /* r1 = r0 */
-               BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
-               /* r0 = XDP_PASS */
-               BPF_MOV64_IMM(BPF_REG_0, 2),
-               /* if r1 == 0 goto pc+5 */
-               BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
-               /* r2 = *(u32 *)(r10 - 4) */
-               BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
-               /* r1 = xskmap[] */
-               BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
-               /* r3 = 0 */
-               BPF_MOV64_IMM(BPF_REG_3, 0),
-               /* call bpf_redirect_map */
-               BPF_EMIT_CALL(BPF_FUNC_redirect_map),
-               /* The jumps are to this instruction */
-               BPF_EXIT_INSN(),
-       };
-
-       /* This is the post-5.3 kernel C-program:
-        * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
-        * {
-        *     return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS);
-        * }
-        */
-       struct bpf_insn prog_redirect_flags[] = {
-               /* r2 = *(u32 *)(r1 + 16) */
-               BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
-               /* r1 = xskmap[] */
-               BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
-               /* r3 = XDP_PASS */
-               BPF_MOV64_IMM(BPF_REG_3, 2),
-               /* call bpf_redirect_map */
-               BPF_EMIT_CALL(BPF_FUNC_redirect_map),
-               BPF_EXIT_INSN(),
-       };
-       size_t insns_cnt[] = {ARRAY_SIZE(prog),
-                             ARRAY_SIZE(prog_redirect_flags),
-       };
-       struct bpf_insn *progs[] = {prog, prog_redirect_flags};
-       enum xsk_prog option = get_xsk_prog();
-       LIBBPF_OPTS(bpf_prog_load_opts, opts,
-               .log_buf = log_buf,
-               .log_size = log_buf_size,
-       );
-
-       prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause",
-                               progs[option], insns_cnt[option], &opts);
-       if (prog_fd < 0) {
-               pr_warn("BPF log buffer:\n%s", log_buf);
-               return prog_fd;
-       }
-
-       ctx->prog_fd = prog_fd;
-       return 0;
-}
-
-static int xsk_create_bpf_link(struct xsk_socket *xsk)
-{
-       DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
-       struct xsk_ctx *ctx = xsk->ctx;
-       __u32 prog_id = 0;
-       int link_fd;
-       int err;
-
-       err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
-       if (err) {
-               pr_warn("getting XDP prog id failed\n");
-               return err;
-       }
-
-       /* if there's a netlink-based XDP prog loaded on interface, bail out
-        * and ask user to do the removal by himself
-        */
-       if (prog_id) {
-               pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
-               return -EINVAL;
-       }
-
-       opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
-
-       link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts);
-       if (link_fd < 0) {
-               pr_warn("bpf_link_create failed: %s\n", strerror(errno));
-               return link_fd;
-       }
-
-       ctx->link_fd = link_fd;
-       return 0;
-}
-
-static int xsk_get_max_queues(struct xsk_socket *xsk)
-{
-       struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
-       struct xsk_ctx *ctx = xsk->ctx;
-       struct ifreq ifr = {};
-       int fd, err, ret;
-
-       fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
-       if (fd < 0)
-               return -errno;
-
-       ifr.ifr_data = (void *)&channels;
-       libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);
-       err = ioctl(fd, SIOCETHTOOL, &ifr);
-       if (err && errno != EOPNOTSUPP) {
-               ret = -errno;
-               goto out;
-       }
-
-       if (err) {
-               /* If the device says it has no channels, then all traffic
-                * is sent to a single stream, so max queues = 1.
-                */
-               ret = 1;
-       } else {
-               /* Take the max of rx, tx, combined. Drivers return
-                * the number of channels in different ways.
-                */
-               ret = max(channels.max_rx, channels.max_tx);
-               ret = max(ret, (int)channels.max_combined);
-       }
-
-out:
-       close(fd);
-       return ret;
-}
-
-static int xsk_create_bpf_maps(struct xsk_socket *xsk)
-{
-       struct xsk_ctx *ctx = xsk->ctx;
-       int max_queues;
-       int fd;
-
-       max_queues = xsk_get_max_queues(xsk);
-       if (max_queues < 0)
-               return max_queues;
-
-       fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map",
-                           sizeof(int), sizeof(int), max_queues, NULL);
-       if (fd < 0)
-               return fd;
-
-       ctx->xsks_map_fd = fd;
-
-       return 0;
-}
-
-static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
-{
-       struct xsk_ctx *ctx = xsk->ctx;
-
-       bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
-       close(ctx->xsks_map_fd);
-}
-
-static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
-{
-       __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
-       __u32 map_len = sizeof(struct bpf_map_info);
-       struct bpf_prog_info prog_info = {};
-       struct xsk_ctx *ctx = xsk->ctx;
-       struct bpf_map_info map_info;
-       int fd, err;
-
-       err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
-       if (err)
-               return err;
-
-       num_maps = prog_info.nr_map_ids;
-
-       map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
-       if (!map_ids)
-               return -ENOMEM;
-
-       memset(&prog_info, 0, prog_len);
-       prog_info.nr_map_ids = num_maps;
-       prog_info.map_ids = (__u64)(unsigned long)map_ids;
-
-       err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
-       if (err)
-               goto out_map_ids;
-
-       ctx->xsks_map_fd = -1;
-
-       for (i = 0; i < prog_info.nr_map_ids; i++) {
-               fd = bpf_map_get_fd_by_id(map_ids[i]);
-               if (fd < 0)
-                       continue;
-
-               memset(&map_info, 0, map_len);
-               err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
-               if (err) {
-                       close(fd);
-                       continue;
-               }
-
-               if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) {
-                       ctx->xsks_map_fd = fd;
-                       break;
-               }
-
-               close(fd);
-       }
-
-       if (ctx->xsks_map_fd == -1)
-               err = -ENOENT;
-
-out_map_ids:
-       free(map_ids);
-       return err;
-}
-
-static int xsk_set_bpf_maps(struct xsk_socket *xsk)
-{
-       struct xsk_ctx *ctx = xsk->ctx;
-
-       return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
-                                  &xsk->fd, 0);
-}
-
-static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
-{
-       struct bpf_link_info link_info;
-       __u32 link_len;
-       __u32 id = 0;
-       int err;
-       int fd;
-
-       while (true) {
-               err = bpf_link_get_next_id(id, &id);
-               if (err) {
-                       if (errno == ENOENT) {
-                               err = 0;
-                               break;
-                       }
-                       pr_warn("can't get next link: %s\n", strerror(errno));
-                       break;
-               }
-
-               fd = bpf_link_get_fd_by_id(id);
-               if (fd < 0) {
-                       if (errno == ENOENT)
-                               continue;
-                       pr_warn("can't get link by id (%u): %s\n", id, strerror(errno));
-                       err = -errno;
-                       break;
-               }
-
-               link_len = sizeof(struct bpf_link_info);
-               memset(&link_info, 0, link_len);
-               err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len);
-               if (err) {
-                       pr_warn("can't get link info: %s\n", strerror(errno));
-                       close(fd);
-                       break;
-               }
-               if (link_info.type == BPF_LINK_TYPE_XDP) {
-                       if (link_info.xdp.ifindex == ifindex) {
-                               *link_fd = fd;
-                               if (prog_id)
-                                       *prog_id = link_info.prog_id;
-                               break;
-                       }
-               }
-               close(fd);
-       }
-
-       return err;
-}
-
-static bool xsk_probe_bpf_link(void)
-{
-       LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE);
-       struct bpf_insn insns[2] = {
-               BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
-               BPF_EXIT_INSN()
-       };
-       int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns);
-       int ifindex_lo = 1;
-       bool ret = false;
-       int err;
-
-       err = xsk_link_lookup(ifindex_lo, NULL, &link_fd);
-       if (err)
-               return ret;
-
-       if (link_fd >= 0)
-               return true;
-
-       prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
-       if (prog_fd < 0)
-               return ret;
-
-       link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts);
-       close(prog_fd);
-
-       if (link_fd >= 0) {
-               ret = true;
-               close(link_fd);
-       }
-
-       return ret;
-}
-
-static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
-{
-       char ifname[IFNAMSIZ];
-       struct xsk_ctx *ctx;
-       char *interface;
-
-       ctx = calloc(1, sizeof(*ctx));
-       if (!ctx)
-               return -ENOMEM;
-
-       interface = if_indextoname(ifindex, &ifname[0]);
-       if (!interface) {
-               free(ctx);
-               return -errno;
-       }
-
-       ctx->ifindex = ifindex;
-       libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
-
-       xsk->ctx = ctx;
-       xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
-
-       return 0;
-}
-
-static int xsk_init_xdp_res(struct xsk_socket *xsk,
-                           int *xsks_map_fd)
-{
-       struct xsk_ctx *ctx = xsk->ctx;
-       int err;
-
-       err = xsk_create_bpf_maps(xsk);
-       if (err)
-               return err;
-
-       err = xsk_load_xdp_prog(xsk);
-       if (err)
-               goto err_load_xdp_prog;
-
-       if (ctx->has_bpf_link)
-               err = xsk_create_bpf_link(xsk);
-       else
-               err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd,
-                                         xsk->config.xdp_flags);
-
-       if (err)
-               goto err_attach_xdp_prog;
-
-       if (!xsk->rx)
-               return err;
-
-       err = xsk_set_bpf_maps(xsk);
-       if (err)
-               goto err_set_bpf_maps;
-
-       return err;
-
-err_set_bpf_maps:
-       if (ctx->has_bpf_link)
-               close(ctx->link_fd);
-       else
-               bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
-err_attach_xdp_prog:
-       close(ctx->prog_fd);
-err_load_xdp_prog:
-       xsk_delete_bpf_maps(xsk);
-       return err;
-}
-
-static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id)
-{
-       struct xsk_ctx *ctx = xsk->ctx;
-       int err;
-
-       ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
-       if (ctx->prog_fd < 0) {
-               err = -errno;
-               goto err_prog_fd;
-       }
-       err = xsk_lookup_bpf_maps(xsk);
-       if (err)
-               goto err_lookup_maps;
-
-       if (!xsk->rx)
-               return err;
-
-       err = xsk_set_bpf_maps(xsk);
-       if (err)
-               goto err_set_maps;
-
-       return err;
-
-err_set_maps:
-       close(ctx->xsks_map_fd);
-err_lookup_maps:
-       close(ctx->prog_fd);
-err_prog_fd:
-       if (ctx->has_bpf_link)
-               close(ctx->link_fd);
-       return err;
-}
-
-static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
-{
-       struct xsk_socket *xsk = _xdp;
-       struct xsk_ctx *ctx = xsk->ctx;
-       __u32 prog_id = 0;
-       int err;
-
-       if (ctx->has_bpf_link)
-               err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
-       else
-               err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
-
-       if (err)
-               return err;
-
-       err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) :
-                        xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id);
-
-       if (!err && xsks_map_fd)
-               *xsks_map_fd = ctx->xsks_map_fd;
-
-       return err;
-}
-
-static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
-                                  __u32 queue_id)
-{
-       struct xsk_ctx *ctx;
-
-       if (list_empty(&umem->ctx_list))
-               return NULL;
-
-       list_for_each_entry(ctx, &umem->ctx_list, list) {
-               if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
-                       ctx->refcount++;
-                       return ctx;
-               }
-       }
-
-       return NULL;
-}
-
-static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
-{
-       struct xsk_umem *umem = ctx->umem;
-       struct xdp_mmap_offsets off;
-       int err;
-
-       if (--ctx->refcount)
-               return;
-
-       if (!unmap)
-               goto out_free;
-
-       err = xsk_get_mmap_offsets(umem->fd, &off);
-       if (err)
-               goto out_free;
-
-       munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
-              sizeof(__u64));
-       munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
-              sizeof(__u64));
-
-out_free:
-       list_del(&ctx->list);
-       free(ctx);
-}
-
-static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
-                                     struct xsk_umem *umem, int ifindex,
-                                     const char *ifname, __u32 queue_id,
-                                     struct xsk_ring_prod *fill,
-                                     struct xsk_ring_cons *comp)
-{
-       struct xsk_ctx *ctx;
-       int err;
-
-       ctx = calloc(1, sizeof(*ctx));
-       if (!ctx)
-               return NULL;
-
-       if (!umem->fill_save) {
-               err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
-               if (err) {
-                       free(ctx);
-                       return NULL;
-               }
-       } else if (umem->fill_save != fill || umem->comp_save != comp) {
-               /* Copy over rings to new structs. */
-               memcpy(fill, umem->fill_save, sizeof(*fill));
-               memcpy(comp, umem->comp_save, sizeof(*comp));
-       }
-
-       ctx->ifindex = ifindex;
-       ctx->refcount = 1;
-       ctx->umem = umem;
-       ctx->queue_id = queue_id;
-       libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
-
-       ctx->fill = fill;
-       ctx->comp = comp;
-       list_add(&ctx->list, &umem->ctx_list);
-       return ctx;
-}
-
-static void xsk_destroy_xsk_struct(struct xsk_socket *xsk)
-{
-       free(xsk->ctx);
-       free(xsk);
-}
-
-int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd)
-{
-       xsk->ctx->xsks_map_fd = fd;
-       return xsk_set_bpf_maps(xsk);
-}
-
-int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd)
-{
-       struct xsk_socket *xsk;
-       int res;
-
-       xsk = calloc(1, sizeof(*xsk));
-       if (!xsk)
-               return -ENOMEM;
-
-       res = xsk_create_xsk_struct(ifindex, xsk);
-       if (res) {
-               free(xsk);
-               return -EINVAL;
-       }
-
-       res = __xsk_setup_xdp_prog(xsk, xsks_map_fd);
-
-       xsk_destroy_xsk_struct(xsk);
-
-       return res;
-}
-
-int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
-                             const char *ifname,
-                             __u32 queue_id, struct xsk_umem *umem,
-                             struct xsk_ring_cons *rx,
-                             struct xsk_ring_prod *tx,
-                             struct xsk_ring_prod *fill,
-                             struct xsk_ring_cons *comp,
-                             const struct xsk_socket_config *usr_config)
-{
-       bool unmap, rx_setup_done = false, tx_setup_done = false;
-       void *rx_map = NULL, *tx_map = NULL;
-       struct sockaddr_xdp sxdp = {};
-       struct xdp_mmap_offsets off;
-       struct xsk_socket *xsk;
-       struct xsk_ctx *ctx;
-       int err, ifindex;
-
-       if (!umem || !xsk_ptr || !(rx || tx))
-               return -EFAULT;
-
-       unmap = umem->fill_save != fill;
-
-       xsk = calloc(1, sizeof(*xsk));
-       if (!xsk)
-               return -ENOMEM;
-
-       err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
-       if (err)
-               goto out_xsk_alloc;
-
-       xsk->outstanding_tx = 0;
-       ifindex = if_nametoindex(ifname);
-       if (!ifindex) {
-               err = -errno;
-               goto out_xsk_alloc;
-       }
-
-       if (umem->refcount++ > 0) {
-               xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
-               if (xsk->fd < 0) {
-                       err = -errno;
-                       goto out_xsk_alloc;
-               }
-       } else {
-               xsk->fd = umem->fd;
-               rx_setup_done = umem->rx_ring_setup_done;
-               tx_setup_done = umem->tx_ring_setup_done;
-       }
-
-       ctx = xsk_get_ctx(umem, ifindex, queue_id);
-       if (!ctx) {
-               if (!fill || !comp) {
-                       err = -EFAULT;
-                       goto out_socket;
-               }
-
-               ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
-                                    fill, comp);
-               if (!ctx) {
-                       err = -ENOMEM;
-                       goto out_socket;
-               }
-       }
-       xsk->ctx = ctx;
-       xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
-
-       if (rx && !rx_setup_done) {
-               err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
-                                &xsk->config.rx_size,
-                                sizeof(xsk->config.rx_size));
-               if (err) {
-                       err = -errno;
-                       goto out_put_ctx;
-               }
-               if (xsk->fd == umem->fd)
-                       umem->rx_ring_setup_done = true;
-       }
-       if (tx && !tx_setup_done) {
-               err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
-                                &xsk->config.tx_size,
-                                sizeof(xsk->config.tx_size));
-               if (err) {
-                       err = -errno;
-                       goto out_put_ctx;
-               }
-               if (xsk->fd == umem->fd)
-                       umem->tx_ring_setup_done = true;
-       }
-
-       err = xsk_get_mmap_offsets(xsk->fd, &off);
-       if (err) {
-               err = -errno;
-               goto out_put_ctx;
-       }
-
-       if (rx) {
-               rx_map = mmap(NULL, off.rx.desc +
-                             xsk->config.rx_size * sizeof(struct xdp_desc),
-                             PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
-                             xsk->fd, XDP_PGOFF_RX_RING);
-               if (rx_map == MAP_FAILED) {
-                       err = -errno;
-                       goto out_put_ctx;
-               }
-
-               rx->mask = xsk->config.rx_size - 1;
-               rx->size = xsk->config.rx_size;
-               rx->producer = rx_map + off.rx.producer;
-               rx->consumer = rx_map + off.rx.consumer;
-               rx->flags = rx_map + off.rx.flags;
-               rx->ring = rx_map + off.rx.desc;
-               rx->cached_prod = *rx->producer;
-               rx->cached_cons = *rx->consumer;
-       }
-       xsk->rx = rx;
-
-       if (tx) {
-               tx_map = mmap(NULL, off.tx.desc +
-                             xsk->config.tx_size * sizeof(struct xdp_desc),
-                             PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
-                             xsk->fd, XDP_PGOFF_TX_RING);
-               if (tx_map == MAP_FAILED) {
-                       err = -errno;
-                       goto out_mmap_rx;
-               }
-
-               tx->mask = xsk->config.tx_size - 1;
-               tx->size = xsk->config.tx_size;
-               tx->producer = tx_map + off.tx.producer;
-               tx->consumer = tx_map + off.tx.consumer;
-               tx->flags = tx_map + off.tx.flags;
-               tx->ring = tx_map + off.tx.desc;
-               tx->cached_prod = *tx->producer;
-               /* cached_cons is r->size bigger than the real consumer pointer
-                * See xsk_prod_nb_free
-                */
-               tx->cached_cons = *tx->consumer + xsk->config.tx_size;
-       }
-       xsk->tx = tx;
-
-       sxdp.sxdp_family = PF_XDP;
-       sxdp.sxdp_ifindex = ctx->ifindex;
-       sxdp.sxdp_queue_id = ctx->queue_id;
-       if (umem->refcount > 1) {
-               sxdp.sxdp_flags |= XDP_SHARED_UMEM;
-               sxdp.sxdp_shared_umem_fd = umem->fd;
-       } else {
-               sxdp.sxdp_flags = xsk->config.bind_flags;
-       }
-
-       err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
-       if (err) {
-               err = -errno;
-               goto out_mmap_tx;
-       }
-
-       ctx->prog_fd = -1;
-
-       if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
-               err = __xsk_setup_xdp_prog(xsk, NULL);
-               if (err)
-                       goto out_mmap_tx;
-       }
-
-       *xsk_ptr = xsk;
-       umem->fill_save = NULL;
-       umem->comp_save = NULL;
-       return 0;
-
-out_mmap_tx:
-       if (tx)
-               munmap(tx_map, off.tx.desc +
-                      xsk->config.tx_size * sizeof(struct xdp_desc));
-out_mmap_rx:
-       if (rx)
-               munmap(rx_map, off.rx.desc +
-                      xsk->config.rx_size * sizeof(struct xdp_desc));
-out_put_ctx:
-       xsk_put_ctx(ctx, unmap);
-out_socket:
-       if (--umem->refcount)
-               close(xsk->fd);
-out_xsk_alloc:
-       free(xsk);
-       return err;
-}
-
-int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
-                      __u32 queue_id, struct xsk_umem *umem,
-                      struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
-                      const struct xsk_socket_config *usr_config)
-{
-       if (!umem)
-               return -EFAULT;
-
-       return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
-                                        rx, tx, umem->fill_save,
-                                        umem->comp_save, usr_config);
-}
-
-int xsk_umem__delete(struct xsk_umem *umem)
-{
-       struct xdp_mmap_offsets off;
-       int err;
-
-       if (!umem)
-               return 0;
-
-       if (umem->refcount)
-               return -EBUSY;
-
-       err = xsk_get_mmap_offsets(umem->fd, &off);
-       if (!err && umem->fill_save && umem->comp_save) {
-               munmap(umem->fill_save->ring - off.fr.desc,
-                      off.fr.desc + umem->config.fill_size * sizeof(__u64));
-               munmap(umem->comp_save->ring - off.cr.desc,
-                      off.cr.desc + umem->config.comp_size * sizeof(__u64));
-       }
-
-       close(umem->fd);
-       free(umem);
-
-       return 0;
-}
-
-void xsk_socket__delete(struct xsk_socket *xsk)
-{
-       size_t desc_sz = sizeof(struct xdp_desc);
-       struct xdp_mmap_offsets off;
-       struct xsk_umem *umem;
-       struct xsk_ctx *ctx;
-       int err;
-
-       if (!xsk)
-               return;
-
-       ctx = xsk->ctx;
-       umem = ctx->umem;
-       if (ctx->prog_fd != -1) {
-               xsk_delete_bpf_maps(xsk);
-               close(ctx->prog_fd);
-               if (ctx->has_bpf_link)
-                       close(ctx->link_fd);
-       }
-
-       err = xsk_get_mmap_offsets(xsk->fd, &off);
-       if (!err) {
-               if (xsk->rx) {
-                       munmap(xsk->rx->ring - off.rx.desc,
-                              off.rx.desc + xsk->config.rx_size * desc_sz);
-               }
-               if (xsk->tx) {
-                       munmap(xsk->tx->ring - off.tx.desc,
-                              off.tx.desc + xsk->config.tx_size * desc_sz);
-               }
-       }
-
-       xsk_put_ctx(ctx, true);
-
-       umem->refcount--;
-       /* Do not close an fd that also has an associated umem connected
-        * to it.
-        */
-       if (xsk->fd != umem->fd)
-               close(xsk->fd);
-       free(xsk);
-}
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
deleted file mode 100644 (file)
index 64e9c57..0000000
+++ /dev/null
@@ -1,336 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-
-/*
- * AF_XDP user-space access library.
- *
- * Copyright (c) 2018 - 2019 Intel Corporation.
- * Copyright (c) 2019 Facebook
- *
- * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
- */
-
-#ifndef __LIBBPF_XSK_H
-#define __LIBBPF_XSK_H
-
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <linux/if_xdp.h>
-
-#include "libbpf.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* This whole API has been deprecated and moved to libxdp that can be found at
- * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so
- * it should just be linking with libxdp instead of libbpf for this set of
- * functionality. If not, please submit a bug report on the aforementioned page.
- */
-
-/* Load-Acquire Store-Release barriers used by the XDP socket
- * library. The following macros should *NOT* be considered part of
- * the xsk.h API, and is subject to change anytime.
- *
- * LIBRARY INTERNAL
- */
-
-#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x)
-#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
-
-#if defined(__i386__) || defined(__x86_64__)
-# define libbpf_smp_store_release(p, v)                                        \
-       do {                                                            \
-               asm volatile("" : : : "memory");                        \
-               __XSK_WRITE_ONCE(*p, v);                                \
-       } while (0)
-# define libbpf_smp_load_acquire(p)                                    \
-       ({                                                              \
-               typeof(*p) ___p1 = __XSK_READ_ONCE(*p);                 \
-               asm volatile("" : : : "memory");                        \
-               ___p1;                                                  \
-       })
-#elif defined(__aarch64__)
-# define libbpf_smp_store_release(p, v)                                        \
-               asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
-# define libbpf_smp_load_acquire(p)                                    \
-       ({                                                              \
-               typeof(*p) ___p1;                                       \
-               asm volatile ("ldar %w0, %1"                            \
-                             : "=r" (___p1) : "Q" (*p) : "memory");    \
-               ___p1;                                                  \
-       })
-#elif defined(__riscv)
-# define libbpf_smp_store_release(p, v)                                        \
-       do {                                                            \
-               asm volatile ("fence rw,w" : : : "memory");             \
-               __XSK_WRITE_ONCE(*p, v);                                \
-       } while (0)
-# define libbpf_smp_load_acquire(p)                                    \
-       ({                                                              \
-               typeof(*p) ___p1 = __XSK_READ_ONCE(*p);                 \
-               asm volatile ("fence r,rw" : : : "memory");             \
-               ___p1;                                                  \
-       })
-#endif
-
-#ifndef libbpf_smp_store_release
-#define libbpf_smp_store_release(p, v)                                 \
-       do {                                                            \
-               __sync_synchronize();                                   \
-               __XSK_WRITE_ONCE(*p, v);                                \
-       } while (0)
-#endif
-
-#ifndef libbpf_smp_load_acquire
-#define libbpf_smp_load_acquire(p)                                     \
-       ({                                                              \
-               typeof(*p) ___p1 = __XSK_READ_ONCE(*p);                 \
-               __sync_synchronize();                                   \
-               ___p1;                                                  \
-       })
-#endif
-
-/* LIBRARY INTERNAL -- END */
-
-/* Do not access these members directly. Use the functions below. */
-#define DEFINE_XSK_RING(name) \
-struct name { \
-       __u32 cached_prod; \
-       __u32 cached_cons; \
-       __u32 mask; \
-       __u32 size; \
-       __u32 *producer; \
-       __u32 *consumer; \
-       void *ring; \
-       __u32 *flags; \
-}
-
-DEFINE_XSK_RING(xsk_ring_prod);
-DEFINE_XSK_RING(xsk_ring_cons);
-
-/* For a detailed explanation on the memory barriers associated with the
- * ring, please take a look at net/xdp/xsk_queue.h.
- */
-
-struct xsk_umem;
-struct xsk_socket;
-
-static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
-                                             __u32 idx)
-{
-       __u64 *addrs = (__u64 *)fill->ring;
-
-       return &addrs[idx & fill->mask];
-}
-
-static inline const __u64 *
-xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
-{
-       const __u64 *addrs = (const __u64 *)comp->ring;
-
-       return &addrs[idx & comp->mask];
-}
-
-static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
-                                                     __u32 idx)
-{
-       struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
-
-       return &descs[idx & tx->mask];
-}
-
-static inline const struct xdp_desc *
-xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
-{
-       const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
-
-       return &descs[idx & rx->mask];
-}
-
-static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r)
-{
-       return *r->flags & XDP_RING_NEED_WAKEUP;
-}
-
-static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
-{
-       __u32 free_entries = r->cached_cons - r->cached_prod;
-
-       if (free_entries >= nb)
-               return free_entries;
-
-       /* Refresh the local tail pointer.
-        * cached_cons is r->size bigger than the real consumer pointer so
-        * that this addition can be avoided in the more frequently
-        * executed code that computs free_entries in the beginning of
-        * this function. Without this optimization it whould have been
-        * free_entries = r->cached_prod - r->cached_cons + r->size.
-        */
-       r->cached_cons = libbpf_smp_load_acquire(r->consumer);
-       r->cached_cons += r->size;
-
-       return r->cached_cons - r->cached_prod;
-}
-
-static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
-{
-       __u32 entries = r->cached_prod - r->cached_cons;
-
-       if (entries == 0) {
-               r->cached_prod = libbpf_smp_load_acquire(r->producer);
-               entries = r->cached_prod - r->cached_cons;
-       }
-
-       return (entries > nb) ? nb : entries;
-}
-
-static inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx)
-{
-       if (xsk_prod_nb_free(prod, nb) < nb)
-               return 0;
-
-       *idx = prod->cached_prod;
-       prod->cached_prod += nb;
-
-       return nb;
-}
-
-static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
-{
-       /* Make sure everything has been written to the ring before indicating
-        * this to the kernel by writing the producer pointer.
-        */
-       libbpf_smp_store_release(prod->producer, *prod->producer + nb);
-}
-
-static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
-{
-       __u32 entries = xsk_cons_nb_avail(cons, nb);
-
-       if (entries > 0) {
-               *idx = cons->cached_cons;
-               cons->cached_cons += entries;
-       }
-
-       return entries;
-}
-
-static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb)
-{
-       cons->cached_cons -= nb;
-}
-
-static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
-{
-       /* Make sure data has been read before indicating we are done
-        * with the entries by updating the consumer pointer.
-        */
-       libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
-
-}
-
-static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
-{
-       return &((char *)umem_area)[addr];
-}
-
-static inline __u64 xsk_umem__extract_addr(__u64 addr)
-{
-       return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
-}
-
-static inline __u64 xsk_umem__extract_offset(__u64 addr)
-{
-       return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
-}
-
-static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
-{
-       return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
-}
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__fd(const struct xsk_umem *umem);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_socket__fd(const struct xsk_socket *xsk);
-
-#define XSK_RING_CONS__DEFAULT_NUM_DESCS      2048
-#define XSK_RING_PROD__DEFAULT_NUM_DESCS      2048
-#define XSK_UMEM__DEFAULT_FRAME_SHIFT    12 /* 4096 bytes */
-#define XSK_UMEM__DEFAULT_FRAME_SIZE     (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
-#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
-#define XSK_UMEM__DEFAULT_FLAGS 0
-
-struct xsk_umem_config {
-       __u32 fill_size;
-       __u32 comp_size;
-       __u32 frame_size;
-       __u32 frame_headroom;
-       __u32 flags;
-};
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd);
-
-/* Flags for the libbpf_flags field. */
-#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
-
-struct xsk_socket_config {
-       __u32 rx_size;
-       __u32 tx_size;
-       __u32 libbpf_flags;
-       __u32 xdp_flags;
-       __u16 bind_flags;
-};
-
-/* Set config to NULL to get the default configuration. */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__create(struct xsk_umem **umem,
-                    void *umem_area, __u64 size,
-                    struct xsk_ring_prod *fill,
-                    struct xsk_ring_cons *comp,
-                    const struct xsk_umem_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__create_v0_0_2(struct xsk_umem **umem,
-                           void *umem_area, __u64 size,
-                           struct xsk_ring_prod *fill,
-                           struct xsk_ring_cons *comp,
-                           const struct xsk_umem_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__create_v0_0_4(struct xsk_umem **umem,
-                           void *umem_area, __u64 size,
-                           struct xsk_ring_prod *fill,
-                           struct xsk_ring_cons *comp,
-                           const struct xsk_umem_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_socket__create(struct xsk_socket **xsk,
-                      const char *ifname, __u32 queue_id,
-                      struct xsk_umem *umem,
-                      struct xsk_ring_cons *rx,
-                      struct xsk_ring_prod *tx,
-                      const struct xsk_socket_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
-                             const char *ifname,
-                             __u32 queue_id, struct xsk_umem *umem,
-                             struct xsk_ring_cons *rx,
-                             struct xsk_ring_prod *tx,
-                             struct xsk_ring_prod *fill,
-                             struct xsk_ring_cons *comp,
-                             const struct xsk_socket_config *config);
-
-/* Returns 0 for success and -EBUSY if the umem is still in use. */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__delete(struct xsk_umem *umem);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-void xsk_socket__delete(struct xsk_socket *xsk);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* __LIBBPF_XSK_H */
index f8ad581..6bd7c28 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/bpf.h>
 #include <bpf/libbpf.h>
 #include <bpf/bpf.h>
+#include <linux/filter.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -49,6 +50,7 @@ struct bpf_prog_priv {
        struct bpf_insn *insns_buf;
        int nr_types;
        int *type_mapping;
+       int *prologue_fds;
 };
 
 struct bpf_perf_object {
@@ -56,6 +58,11 @@ struct bpf_perf_object {
        struct bpf_object *obj;
 };
 
+struct bpf_preproc_result {
+       struct bpf_insn *new_insn_ptr;
+       int new_insn_cnt;
+};
+
 static LIST_HEAD(bpf_objects_list);
 static struct hashmap *bpf_program_hash;
 static struct hashmap *bpf_map_hash;
@@ -86,6 +93,7 @@ bpf_perf_object__next(struct bpf_perf_object *prev)
             (perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp))
 
 static bool libbpf_initialized;
+static int libbpf_sec_handler;
 
 static int bpf_perf_object__add(struct bpf_object *obj)
 {
@@ -99,12 +107,76 @@ static int bpf_perf_object__add(struct bpf_object *obj)
        return perf_obj ? 0 : -ENOMEM;
 }
 
+static void *program_priv(const struct bpf_program *prog)
+{
+       void *priv;
+
+       if (IS_ERR_OR_NULL(bpf_program_hash))
+               return NULL;
+       if (!hashmap__find(bpf_program_hash, prog, &priv))
+               return NULL;
+       return priv;
+}
+
+static struct bpf_insn prologue_init_insn[] = {
+       BPF_MOV64_IMM(BPF_REG_2, 0),
+       BPF_MOV64_IMM(BPF_REG_3, 0),
+       BPF_MOV64_IMM(BPF_REG_4, 0),
+       BPF_MOV64_IMM(BPF_REG_5, 0),
+};
+
+static int libbpf_prog_prepare_load_fn(struct bpf_program *prog,
+                                      struct bpf_prog_load_opts *opts __maybe_unused,
+                                      long cookie __maybe_unused)
+{
+       size_t init_size_cnt = ARRAY_SIZE(prologue_init_insn);
+       size_t orig_insn_cnt, insn_cnt, init_size, orig_size;
+       struct bpf_prog_priv *priv = program_priv(prog);
+       const struct bpf_insn *orig_insn;
+       struct bpf_insn *insn;
+
+       if (IS_ERR_OR_NULL(priv)) {
+               pr_debug("bpf: failed to get private field\n");
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+
+       if (!priv->need_prologue)
+               return 0;
+
+       /* prepend initialization code to program instructions */
+       orig_insn = bpf_program__insns(prog);
+       orig_insn_cnt = bpf_program__insn_cnt(prog);
+       init_size = init_size_cnt * sizeof(*insn);
+       orig_size = orig_insn_cnt * sizeof(*insn);
+
+       insn_cnt = orig_insn_cnt + init_size_cnt;
+       insn = malloc(insn_cnt * sizeof(*insn));
+       if (!insn)
+               return -ENOMEM;
+
+       memcpy(insn, prologue_init_insn, init_size);
+       memcpy((char *) insn + init_size, orig_insn, orig_size);
+       bpf_program__set_insns(prog, insn, insn_cnt);
+       return 0;
+}
+
 static int libbpf_init(void)
 {
+       LIBBPF_OPTS(libbpf_prog_handler_opts, handler_opts,
+               .prog_prepare_load_fn = libbpf_prog_prepare_load_fn,
+       );
+
        if (libbpf_initialized)
                return 0;
 
        libbpf_set_print(libbpf_perf_print);
+       libbpf_sec_handler = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_KPROBE,
+                                                         0, &handler_opts);
+       if (libbpf_sec_handler < 0) {
+               pr_debug("bpf: failed to register libbpf section handler: %d\n",
+                        libbpf_sec_handler);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
        libbpf_initialized = true;
        return 0;
 }
@@ -188,14 +260,31 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
        return obj;
 }
 
+static void close_prologue_programs(struct bpf_prog_priv *priv)
+{
+       struct perf_probe_event *pev;
+       int i, fd;
+
+       if (!priv->need_prologue)
+               return;
+       pev = &priv->pev;
+       for (i = 0; i < pev->ntevs; i++) {
+               fd = priv->prologue_fds[i];
+               if (fd != -1)
+                       close(fd);
+       }
+}
+
 static void
 clear_prog_priv(const struct bpf_program *prog __maybe_unused,
                void *_priv)
 {
        struct bpf_prog_priv *priv = _priv;
 
+       close_prologue_programs(priv);
        cleanup_perf_probe_events(&priv->pev, 1);
        zfree(&priv->insns_buf);
+       zfree(&priv->prologue_fds);
        zfree(&priv->type_mapping);
        zfree(&priv->sys_name);
        zfree(&priv->evt_name);
@@ -243,17 +332,6 @@ static bool ptr_equal(const void *key1, const void *key2,
        return key1 == key2;
 }
 
-static void *program_priv(const struct bpf_program *prog)
-{
-       void *priv;
-
-       if (IS_ERR_OR_NULL(bpf_program_hash))
-               return NULL;
-       if (!hashmap__find(bpf_program_hash, prog, &priv))
-               return NULL;
-       return priv;
-}
-
 static int program_set_priv(struct bpf_program *prog, void *priv)
 {
        void *old_priv;
@@ -558,8 +636,8 @@ static int bpf__prepare_probe(void)
 
 static int
 preproc_gen_prologue(struct bpf_program *prog, int n,
-                    struct bpf_insn *orig_insns, int orig_insns_cnt,
-                    struct bpf_prog_prep_result *res)
+                    const struct bpf_insn *orig_insns, int orig_insns_cnt,
+                    struct bpf_preproc_result *res)
 {
        struct bpf_prog_priv *priv = program_priv(prog);
        struct probe_trace_event *tev;
@@ -607,7 +685,6 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
 
        res->new_insn_ptr = buf;
        res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
-       res->pfd = NULL;
        return 0;
 
 errout:
@@ -715,7 +792,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
        struct bpf_prog_priv *priv = program_priv(prog);
        struct perf_probe_event *pev;
        bool need_prologue = false;
-       int err, i;
+       int i;
 
        if (IS_ERR_OR_NULL(priv)) {
                pr_debug("Internal error when hook preprocessor\n");
@@ -753,6 +830,13 @@ static int hook_load_preprocessor(struct bpf_program *prog)
                return -ENOMEM;
        }
 
+       priv->prologue_fds = malloc(sizeof(int) * pev->ntevs);
+       if (!priv->prologue_fds) {
+               pr_debug("Not enough memory: alloc prologue fds failed\n");
+               return -ENOMEM;
+       }
+       memset(priv->prologue_fds, -1, sizeof(int) * pev->ntevs);
+
        priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
        if (!priv->type_mapping) {
                pr_debug("Not enough memory: alloc type_mapping failed\n");
@@ -761,13 +845,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
        memset(priv->type_mapping, -1,
               sizeof(int) * pev->ntevs);
 
-       err = map_prologue(pev, priv->type_mapping, &priv->nr_types);
-       if (err)
-               return err;
-
-       err = bpf_program__set_prep(prog, priv->nr_types,
-                                   preproc_gen_prologue);
-       return err;
+       return map_prologue(pev, priv->type_mapping, &priv->nr_types);
 }
 
 int bpf__probe(struct bpf_object *obj)
@@ -874,6 +952,77 @@ int bpf__unprobe(struct bpf_object *obj)
        return ret;
 }
 
+static int bpf_object__load_prologue(struct bpf_object *obj)
+{
+       int init_cnt = ARRAY_SIZE(prologue_init_insn);
+       const struct bpf_insn *orig_insns;
+       struct bpf_preproc_result res;
+       struct perf_probe_event *pev;
+       struct bpf_program *prog;
+       int orig_insns_cnt;
+
+       bpf_object__for_each_program(prog, obj) {
+               struct bpf_prog_priv *priv = program_priv(prog);
+               int err, i, fd;
+
+               if (IS_ERR_OR_NULL(priv)) {
+                       pr_debug("bpf: failed to get private field\n");
+                       return -BPF_LOADER_ERRNO__INTERNAL;
+               }
+
+               if (!priv->need_prologue)
+                       continue;
+
+               /*
+                * For each program that needs prologue we do following:
+                *
+                * - take its current instructions and use them
+                *   to generate the new code with prologue
+                * - load new instructions with bpf_prog_load
+                *   and keep the fd in prologue_fds
+                * - new fd will be used in bpf__foreach_event
+                *   to connect this program with perf evsel
+                */
+               orig_insns = bpf_program__insns(prog);
+               orig_insns_cnt = bpf_program__insn_cnt(prog);
+
+               pev = &priv->pev;
+               for (i = 0; i < pev->ntevs; i++) {
+                       /*
+                        * Skipping artificall prologue_init_insn instructions
+                        * (init_cnt), so the prologue can be generated instead
+                        * of them.
+                        */
+                       err = preproc_gen_prologue(prog, i,
+                                                  orig_insns + init_cnt,
+                                                  orig_insns_cnt - init_cnt,
+                                                  &res);
+                       if (err)
+                               return err;
+
+                       fd = bpf_prog_load(bpf_program__get_type(prog),
+                                          bpf_program__name(prog), "GPL",
+                                          res.new_insn_ptr,
+                                          res.new_insn_cnt, NULL);
+                       if (fd < 0) {
+                               char bf[128];
+
+                               libbpf_strerror(-errno, bf, sizeof(bf));
+                               pr_debug("bpf: load objects with prologue failed: err=%d: (%s)\n",
+                                        -errno, bf);
+                               return -errno;
+                       }
+                       priv->prologue_fds[i] = fd;
+               }
+               /*
+                * We no longer need the original program,
+                * we can unload it.
+                */
+               bpf_program__unload(prog);
+       }
+       return 0;
+}
+
 int bpf__load(struct bpf_object *obj)
 {
        int err;
@@ -885,7 +1034,7 @@ int bpf__load(struct bpf_object *obj)
                pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf);
                return err;
        }
-       return 0;
+       return bpf_object__load_prologue(obj);
 }
 
 int bpf__foreach_event(struct bpf_object *obj,
@@ -920,13 +1069,10 @@ int bpf__foreach_event(struct bpf_object *obj,
                for (i = 0; i < pev->ntevs; i++) {
                        tev = &pev->tevs[i];
 
-                       if (priv->need_prologue) {
-                               int type = priv->type_mapping[i];
-
-                               fd = bpf_program__nth_fd(prog, type);
-                       } else {
+                       if (priv->need_prologue)
+                               fd = priv->prologue_fds[i];
+                       else
                                fd = bpf_program__fd(prog);
-                       }
 
                        if (fd < 0) {
                                pr_debug("bpf: failed to get file descriptor\n");
index ca2f47f..3a8cb24 100644 (file)
@@ -41,6 +41,6 @@ test_cpp
 /bench
 *.ko
 *.tmp
-xdpxceiver
+xskxceiver
 xdp_redirect_multi
 xdp_synproxy
index cb8e552..8d59ec7 100644 (file)
@@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
        flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
        test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
-       xdpxceiver xdp_redirect_multi xdp_synproxy
+       xskxceiver xdp_redirect_multi xdp_synproxy
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
 
@@ -230,6 +230,8 @@ $(OUTPUT)/xdping: $(TESTING_HELPERS)
 $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS)
 $(OUTPUT)/test_maps: $(TESTING_HELPERS)
 $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS)
+$(OUTPUT)/xsk.o: $(BPFOBJ)
+$(OUTPUT)/xskxceiver: $(OUTPUT)/xsk.o
 
 BPFTOOL ?= $(DEFAULT_BPFTOOL)
 $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
@@ -571,6 +573,8 @@ $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
 $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
 $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
 $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
+$(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
+$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h
 $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
 $(OUTPUT)/bench: LDLIBS += -lm
 $(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -583,7 +587,9 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
                 $(OUTPUT)/bench_bloom_filter_map.o \
                 $(OUTPUT)/bench_bpf_loop.o \
                 $(OUTPUT)/bench_strncmp.o \
-                $(OUTPUT)/bench_bpf_hashmap_full_update.o
+                $(OUTPUT)/bench_bpf_hashmap_full_update.o \
+                $(OUTPUT)/bench_local_storage.o \
+                $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o
        $(call msg,BINARY,,$@)
        $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
 
index d8aa62b..c1f20a1 100644 (file)
@@ -79,6 +79,43 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
               hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec);
 }
 
+void
+grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
+{
+       int i;
+
+       memset(gp_stat, 0, sizeof(struct basic_stats));
+
+       for (i = 0; i < res_cnt; i++)
+               gp_stat->mean += res[i].gp_ns / 1000.0 / (double)res[i].gp_ct / (0.0 + res_cnt);
+
+#define IT_MEAN_DIFF (res[i].gp_ns / 1000.0 / (double)res[i].gp_ct - gp_stat->mean)
+       if (res_cnt > 1) {
+               for (i = 0; i < res_cnt; i++)
+                       gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
+       }
+       gp_stat->stddev = sqrt(gp_stat->stddev);
+#undef IT_MEAN_DIFF
+}
+
+void
+grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
+{
+       int i;
+
+       memset(gp_stat, 0, sizeof(struct basic_stats));
+       for (i = 0; i < res_cnt; i++)
+               gp_stat->mean += res[i].stime / (double)res[i].gp_ct / (0.0 + res_cnt);
+
+#define IT_MEAN_DIFF (res[i].stime / (double)res[i].gp_ct - gp_stat->mean)
+       if (res_cnt > 1) {
+               for (i = 0; i < res_cnt; i++)
+                       gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
+       }
+       gp_stat->stddev = sqrt(gp_stat->stddev);
+#undef IT_MEAN_DIFF
+}
+
 void hits_drops_report_final(struct bench_res res[], int res_cnt)
 {
        int i;
@@ -150,6 +187,53 @@ void ops_report_final(struct bench_res res[], int res_cnt)
        printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt);
 }
 
+void local_storage_report_progress(int iter, struct bench_res *res,
+                                  long delta_ns)
+{
+       double important_hits_per_sec, hits_per_sec;
+       double delta_sec = delta_ns / 1000000000.0;
+
+       hits_per_sec = res->hits / 1000000.0 / delta_sec;
+       important_hits_per_sec = res->important_hits / 1000000.0 / delta_sec;
+
+       printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
+
+       printf("hits %8.3lfM/s ", hits_per_sec);
+       printf("important_hits %8.3lfM/s\n", important_hits_per_sec);
+}
+
+void local_storage_report_final(struct bench_res res[], int res_cnt)
+{
+       double important_hits_mean = 0.0, important_hits_stddev = 0.0;
+       double hits_mean = 0.0, hits_stddev = 0.0;
+       int i;
+
+       for (i = 0; i < res_cnt; i++) {
+               hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+               important_hits_mean += res[i].important_hits / 1000000.0 / (0.0 + res_cnt);
+       }
+
+       if (res_cnt > 1)  {
+               for (i = 0; i < res_cnt; i++) {
+                       hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+                                      (hits_mean - res[i].hits / 1000000.0) /
+                                      (res_cnt - 1.0);
+                       important_hits_stddev +=
+                                      (important_hits_mean - res[i].important_hits / 1000000.0) *
+                                      (important_hits_mean - res[i].important_hits / 1000000.0) /
+                                      (res_cnt - 1.0);
+               }
+
+               hits_stddev = sqrt(hits_stddev);
+               important_hits_stddev = sqrt(important_hits_stddev);
+       }
+       printf("Summary: hits throughput %8.3lf \u00B1 %5.3lf M ops/s, ",
+              hits_mean, hits_stddev);
+       printf("hits latency %8.3lf ns/op, ", 1000.0 / hits_mean);
+       printf("important_hits throughput %8.3lf \u00B1 %5.3lf M ops/s\n",
+              important_hits_mean, important_hits_stddev);
+}
+
 const char *argp_program_version = "benchmark";
 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
 const char argp_program_doc[] =
@@ -188,13 +272,18 @@ static const struct argp_option opts[] = {
 extern struct argp bench_ringbufs_argp;
 extern struct argp bench_bloom_map_argp;
 extern struct argp bench_bpf_loop_argp;
+extern struct argp bench_local_storage_argp;
+extern struct argp bench_local_storage_rcu_tasks_trace_argp;
 extern struct argp bench_strncmp_argp;
 
 static const struct argp_child bench_parsers[] = {
        { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
        { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 },
        { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
+       { &bench_local_storage_argp, 0, "local_storage benchmark", 0 },
        { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
+       { &bench_local_storage_rcu_tasks_trace_argp, 0,
+               "local_storage RCU Tasks Trace slowdown benchmark", 0 },
        {},
 };
 
@@ -397,6 +486,10 @@ extern const struct bench bench_bpf_loop;
 extern const struct bench bench_strncmp_no_helper;
 extern const struct bench bench_strncmp_helper;
 extern const struct bench bench_bpf_hashmap_full_update;
+extern const struct bench bench_local_storage_cache_seq_get;
+extern const struct bench bench_local_storage_cache_interleaved_get;
+extern const struct bench bench_local_storage_cache_hashmap_control;
+extern const struct bench bench_local_storage_tasks_trace;
 
 static const struct bench *benchs[] = {
        &bench_count_global,
@@ -432,6 +525,10 @@ static const struct bench *benchs[] = {
        &bench_strncmp_no_helper,
        &bench_strncmp_helper,
        &bench_bpf_hashmap_full_update,
+       &bench_local_storage_cache_seq_get,
+       &bench_local_storage_cache_interleaved_get,
+       &bench_local_storage_cache_hashmap_control,
+       &bench_local_storage_tasks_trace,
 };
 
 static void setup_benchmark()
index fb3e213..d748255 100644 (file)
@@ -30,10 +30,19 @@ struct env {
        struct cpu_set cons_cpus;
 };
 
+struct basic_stats {
+       double mean;
+       double stddev;
+};
+
 struct bench_res {
        long hits;
        long drops;
        long false_hits;
+       long important_hits;
+       unsigned long gp_ns;
+       unsigned long gp_ct;
+       unsigned int stime;
 };
 
 struct bench {
@@ -61,6 +70,13 @@ void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns);
 void false_hits_report_final(struct bench_res res[], int res_cnt);
 void ops_report_progress(int iter, struct bench_res *res, long delta_ns);
 void ops_report_final(struct bench_res res[], int res_cnt);
+void local_storage_report_progress(int iter, struct bench_res *res,
+                                  long delta_ns);
+void local_storage_report_final(struct bench_res res[], int res_cnt);
+void grace_period_latency_basic_stats(struct bench_res res[], int res_cnt,
+                                     struct basic_stats *gp_stat);
+void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt,
+                                   struct basic_stats *gp_stat);
 
 static inline __u64 get_time_ns(void)
 {
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
new file mode 100644 (file)
index 0000000..5a378c8
--- /dev/null
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+#include <linux/btf.h>
+
+#include "local_storage_bench.skel.h"
+#include "bench.h"
+
+#include <test_btf.h>
+
+static struct {
+       __u32 nr_maps;
+       __u32 hashmap_nr_keys_used;
+} args = {
+       .nr_maps = 1000,
+       .hashmap_nr_keys_used = 1000,
+};
+
+enum {
+       ARG_NR_MAPS = 6000,
+       ARG_HASHMAP_NR_KEYS_USED = 6001,
+};
+
+static const struct argp_option opts[] = {
+       { "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0,
+               "Set number of local_storage maps"},
+       { "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS",
+               0, "When doing hashmap test, set number of hashmap keys test uses"},
+       {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+       long ret;
+
+       switch (key) {
+       case ARG_NR_MAPS:
+               ret = strtol(arg, NULL, 10);
+               if (ret < 1 || ret > UINT_MAX) {
+                       fprintf(stderr, "invalid nr_maps");
+                       argp_usage(state);
+               }
+               args.nr_maps = ret;
+               break;
+       case ARG_HASHMAP_NR_KEYS_USED:
+               ret = strtol(arg, NULL, 10);
+               if (ret < 1 || ret > UINT_MAX) {
+                       fprintf(stderr, "invalid hashmap_nr_keys_used");
+                       argp_usage(state);
+               }
+               args.hashmap_nr_keys_used = ret;
+               break;
+       default:
+               return ARGP_ERR_UNKNOWN;
+       }
+
+       return 0;
+}
+
+const struct argp bench_local_storage_argp = {
+       .options = opts,
+       .parser = parse_arg,
+};
+
+/* Keep in sync w/ array of maps in bpf */
+#define MAX_NR_MAPS 1000
+/* keep in sync w/ same define in bpf */
+#define HASHMAP_SZ 4194304
+
+static void validate(void)
+{
+       if (env.producer_cnt != 1) {
+               fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+               exit(1);
+       }
+       if (env.consumer_cnt != 1) {
+               fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+               exit(1);
+       }
+
+       if (args.nr_maps > MAX_NR_MAPS) {
+               fprintf(stderr, "nr_maps must be <= 1000\n");
+               exit(1);
+       }
+
+       if (args.hashmap_nr_keys_used > HASHMAP_SZ) {
+               fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ);
+               exit(1);
+       }
+}
+
+static struct {
+       struct local_storage_bench *skel;
+       void *bpf_obj;
+       struct bpf_map *array_of_maps;
+} ctx;
+
+static void prepopulate_hashmap(int fd)
+{
+       int i, key, val;
+
+       /* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so
+        * populate the hashmap for a similar comparison
+        */
+       for (i = 0; i < HASHMAP_SZ; i++) {
+               key = val = i;
+               if (bpf_map_update_elem(fd, &key, &val, 0)) {
+                       fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key);
+                       exit(1);
+               }
+       }
+}
+
+static void __setup(struct bpf_program *prog, bool hashmap)
+{
+       struct bpf_map *inner_map;
+       int i, fd, mim_fd, err;
+
+       LIBBPF_OPTS(bpf_map_create_opts, create_opts);
+
+       if (!hashmap)
+               create_opts.map_flags = BPF_F_NO_PREALLOC;
+
+       ctx.skel->rodata->num_maps = args.nr_maps;
+       ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used;
+       inner_map = bpf_map__inner_map(ctx.array_of_maps);
+       create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map);
+       create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map);
+
+       err = local_storage_bench__load(ctx.skel);
+       if (err) {
+               fprintf(stderr, "Error loading skeleton\n");
+               goto err_out;
+       }
+
+       create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj);
+
+       mim_fd = bpf_map__fd(ctx.array_of_maps);
+       if (mim_fd < 0) {
+               fprintf(stderr, "Error getting map_in_map fd\n");
+               goto err_out;
+       }
+
+       for (i = 0; i < args.nr_maps; i++) {
+               if (hashmap)
+                       fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int),
+                                           sizeof(int), HASHMAP_SZ, &create_opts);
+               else
+                       fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int),
+                                           sizeof(int), 0, &create_opts);
+               if (fd < 0) {
+                       fprintf(stderr, "Error creating map %d: %d\n", i, fd);
+                       goto err_out;
+               }
+
+               if (hashmap)
+                       prepopulate_hashmap(fd);
+
+               err = bpf_map_update_elem(mim_fd, &i, &fd, 0);
+               if (err) {
+                       fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i);
+                       goto err_out;
+               }
+       }
+
+       if (!bpf_program__attach(prog)) {
+               fprintf(stderr, "Error attaching bpf program\n");
+               goto err_out;
+       }
+
+       return;
+err_out:
+       exit(1);
+}
+
+static void hashmap_setup(void)
+{
+       struct local_storage_bench *skel;
+
+       setup_libbpf();
+
+       skel = local_storage_bench__open();
+       ctx.skel = skel;
+       ctx.array_of_maps = skel->maps.array_of_hash_maps;
+       skel->rodata->use_hashmap = 1;
+       skel->rodata->interleave = 0;
+
+       __setup(skel->progs.get_local, true);
+}
+
+static void local_storage_cache_get_setup(void)
+{
+       struct local_storage_bench *skel;
+
+       setup_libbpf();
+
+       skel = local_storage_bench__open();
+       ctx.skel = skel;
+       ctx.array_of_maps = skel->maps.array_of_local_storage_maps;
+       skel->rodata->use_hashmap = 0;
+       skel->rodata->interleave = 0;
+
+       __setup(skel->progs.get_local, false);
+}
+
+static void local_storage_cache_get_interleaved_setup(void)
+{
+       struct local_storage_bench *skel;
+
+       setup_libbpf();
+
+       skel = local_storage_bench__open();
+       ctx.skel = skel;
+       ctx.array_of_maps = skel->maps.array_of_local_storage_maps;
+       skel->rodata->use_hashmap = 0;
+       skel->rodata->interleave = 1;
+
+       __setup(skel->progs.get_local, false);
+}
+
+static void measure(struct bench_res *res)
+{
+       res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+       res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0);
+}
+
+static inline void trigger_bpf_program(void)
+{
+       syscall(__NR_getpgid);
+}
+
+static void *consumer(void *input)
+{
+       return NULL;
+}
+
+static void *producer(void *input)
+{
+       while (true)
+               trigger_bpf_program();
+
+       return NULL;
+}
+
+/* cache sequential and interleaved get benchs test local_storage get
+ * performance, specifically they demonstrate performance cliff of
+ * current list-plus-cache local_storage model.
+ *
+ * cache sequential get: call bpf_task_storage_get on n maps in order
+ * cache interleaved get: like "sequential get", but interleave 4 calls to the
+ *     'important' map (idx 0 in array_of_maps) for every 10 calls. Goal
+ *     is to mimic environment where many progs are accessing their local_storage
+ *     maps, with 'our' prog needing to access its map more often than others
+ */
+const struct bench bench_local_storage_cache_seq_get = {
+       .name = "local-storage-cache-seq-get",
+       .validate = validate,
+       .setup = local_storage_cache_get_setup,
+       .producer_thread = producer,
+       .consumer_thread = consumer,
+       .measure = measure,
+       .report_progress = local_storage_report_progress,
+       .report_final = local_storage_report_final,
+};
+
+const struct bench bench_local_storage_cache_interleaved_get = {
+       .name = "local-storage-cache-int-get",
+       .validate = validate,
+       .setup = local_storage_cache_get_interleaved_setup,
+       .producer_thread = producer,
+       .consumer_thread = consumer,
+       .measure = measure,
+       .report_progress = local_storage_report_progress,
+       .report_final = local_storage_report_final,
+};
+
+const struct bench bench_local_storage_cache_hashmap_control = {
+       .name = "local-storage-cache-hashmap-control",
+       .validate = validate,
+       .setup = hashmap_setup,
+       .producer_thread = producer,
+       .consumer_thread = consumer,
+       .measure = measure,
+       .report_progress = local_storage_report_progress,
+       .report_final = local_storage_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
new file mode 100644 (file)
index 0000000..43f109d
--- /dev/null
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+
+#include <sys/prctl.h>
+#include "local_storage_rcu_tasks_trace_bench.skel.h"
+#include "bench.h"
+
+#include <signal.h>
+
+static struct {
+       __u32 nr_procs;
+       __u32 kthread_pid;
+       bool quiet;
+} args = {
+       .nr_procs = 1000,
+       .kthread_pid = 0,
+       .quiet = false,
+};
+
+enum {
+       ARG_NR_PROCS = 7000,
+       ARG_KTHREAD_PID = 7001,
+       ARG_QUIET = 7002,
+};
+
+static const struct argp_option opts[] = {
+       { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0,
+               "Set number of user processes to spin up"},
+       { "kthread_pid", ARG_KTHREAD_PID, "PID", 0,
+               "Pid of rcu_tasks_trace kthread for ticks tracking"},
+       { "quiet", ARG_QUIET, "{0,1}", 0,
+               "If true, don't report progress"},
+       {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+       long ret;
+
+       switch (key) {
+       case ARG_NR_PROCS:
+               ret = strtol(arg, NULL, 10);
+               if (ret < 1 || ret > UINT_MAX) {
+                       fprintf(stderr, "invalid nr_procs\n");
+                       argp_usage(state);
+               }
+               args.nr_procs = ret;
+               break;
+       case ARG_KTHREAD_PID:
+               ret = strtol(arg, NULL, 10);
+               if (ret < 1) {
+                       fprintf(stderr, "invalid kthread_pid\n");
+                       argp_usage(state);
+               }
+               args.kthread_pid = ret;
+               break;
+       case ARG_QUIET:
+               ret = strtol(arg, NULL, 10);
+               if (ret < 0 || ret > 1) {
+                       fprintf(stderr, "invalid quiet %ld\n", ret);
+                       argp_usage(state);
+               }
+               args.quiet = ret;
+               break;
+break;
+       default:
+               return ARGP_ERR_UNKNOWN;
+       }
+
+       return 0;
+}
+
+const struct argp bench_local_storage_rcu_tasks_trace_argp = {
+       .options = opts,
+       .parser = parse_arg,
+};
+
+#define MAX_SLEEP_PROCS 150000
+
+static void validate(void)
+{
+       if (env.producer_cnt != 1) {
+               fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+               exit(1);
+       }
+       if (env.consumer_cnt != 1) {
+               fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+               exit(1);
+       }
+
+       if (args.nr_procs > MAX_SLEEP_PROCS) {
+               fprintf(stderr, "benchmark supports up to %u sleeper procs!\n",
+                       MAX_SLEEP_PROCS);
+               exit(1);
+       }
+}
+
+static long kthread_pid_ticks(void)
+{
+       char procfs_path[100];
+       long stime;
+       FILE *f;
+
+       if (!args.kthread_pid)
+               return -1;
+
+       sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid);
+       f = fopen(procfs_path, "r");
+       if (!f) {
+               fprintf(stderr, "couldn't open %s, exiting\n", procfs_path);
+               goto err_out;
+       }
+       if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) {
+               fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path);
+               goto err_out;
+       }
+       fclose(f);
+       return stime;
+
+err_out:
+       if (f)
+               fclose(f);
+       exit(1);
+       return 0;
+}
+
+static struct {
+       struct local_storage_rcu_tasks_trace_bench *skel;
+       long prev_kthread_stime;
+} ctx;
+
+static void sleep_and_loop(void)
+{
+       while (true) {
+               sleep(rand() % 4);
+               syscall(__NR_getpgid);
+       }
+}
+
+static void local_storage_tasks_trace_setup(void)
+{
+       int i, err, forkret, runner_pid;
+
+       runner_pid = getpid();
+
+       for (i = 0; i < args.nr_procs; i++) {
+               forkret = fork();
+               if (forkret < 0) {
+                       fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i,
+                               args.nr_procs);
+                       goto err_out;
+               }
+
+               if (!forkret) {
+                       err = prctl(PR_SET_PDEATHSIG, SIGKILL);
+                       if (err < 0) {
+                               fprintf(stderr, "prctl failed with err %d, exiting\n", errno);
+                               goto err_out;
+                       }
+
+                       if (getppid() != runner_pid) {
+                               fprintf(stderr, "Runner died while spinning up procs, exiting\n");
+                               goto err_out;
+                       }
+                       sleep_and_loop();
+               }
+       }
+       printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid);
+
+       setup_libbpf();
+
+       ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load();
+       if (!ctx.skel) {
+               fprintf(stderr, "Error doing open_and_load, exiting\n");
+               goto err_out;
+       }
+
+       ctx.prev_kthread_stime = kthread_pid_ticks();
+
+       if (!bpf_program__attach(ctx.skel->progs.get_local)) {
+               fprintf(stderr, "Error attaching bpf program\n");
+               goto err_out;
+       }
+
+       if (!bpf_program__attach(ctx.skel->progs.pregp_step)) {
+               fprintf(stderr, "Error attaching bpf program\n");
+               goto err_out;
+       }
+
+       if (!bpf_program__attach(ctx.skel->progs.postgp)) {
+               fprintf(stderr, "Error attaching bpf program\n");
+               goto err_out;
+       }
+
+       return;
+err_out:
+       exit(1);
+}
+
+static void measure(struct bench_res *res)
+{
+       long ticks;
+
+       res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0);
+       res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0);
+       ticks = kthread_pid_ticks();
+       res->stime = ticks - ctx.prev_kthread_stime;
+       ctx.prev_kthread_stime = ticks;
+}
+
+static void *consumer(void *input)
+{
+       return NULL;
+}
+
+static void *producer(void *input)
+{
+       while (true)
+               syscall(__NR_getpgid);
+       return NULL;
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+       if (ctx.skel->bss->unexpected) {
+               fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp).");
+               fprintf(stderr, "Data can't be trusted, exiting\n");
+               exit(1);
+       }
+
+       if (args.quiet)
+               return;
+
+       printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n",
+              iter, res->gp_ns / (double)res->gp_ct);
+       printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n",
+              iter, res->stime / (double)res->gp_ct);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+       struct basic_stats gp_stat;
+
+       grace_period_latency_basic_stats(res, res_cnt, &gp_stat);
+       printf("SUMMARY tasks_trace grace period latency");
+       printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev);
+       grace_period_ticks_basic_stats(res, res_cnt, &gp_stat);
+       printf("SUMMARY ticks per tasks_trace grace period");
+       printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev);
+}
+
+/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use
+ * of RCU Tasks-Trace.
+ *
+ * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside
+ * from sleep() loop, and creating/destroying BPF task-local storage on wakeup.
+ * The number of forked tasks is configurable.
+ *
+ * exercising code paths which call call_rcu_tasks_trace while there are many
+ * thousands of tasks on the system should result in RCU Tasks-Trace having to
+ * do a noticeable amount of work.
+ *
+ * This should be observable by measuring rcu_tasks_trace_kthread CPU usage
+ * after the grace period has ended, or by measuring grace period latency.
+ *
+ * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step
+ * and rcu_tasks_trace_postgp functions to measure grace period latency and
+ * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks
+ */
+const struct bench bench_local_storage_tasks_trace = {
+       .name = "local-storage-tasks-trace",
+       .validate = validate,
+       .setup = local_storage_tasks_trace_setup,
+       .producer_thread = producer,
+       .consumer_thread = consumer,
+       .measure = measure,
+       .report_progress = report_progress,
+       .report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh
new file mode 100755 (executable)
index 0000000..2eb2b51
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+header "Hashmap Control"
+for i in 10 1000 10000 100000 4194304; do
+subtitle "num keys: $i"
+       summarize_local_storage "hashmap (control) sequential    get: "\
+               "$(./bench --nr_maps 1 --hashmap_nr_keys_used=$i local-storage-cache-hashmap-control)"
+       printf "\n"
+done
+
+header "Local Storage"
+for i in 1 10 16 17 24 32 100 1000; do
+subtitle "num_maps: $i"
+       summarize_local_storage "local_storage cache sequential  get: "\
+               "$(./bench --nr_maps $i local-storage-cache-seq-get)"
+       summarize_local_storage "local_storage cache interleaved get: "\
+               "$(./bench --nr_maps $i local-storage-cache-int-get)"
+       printf "\n"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
new file mode 100755 (executable)
index 0000000..5dac1f0
--- /dev/null
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+kthread_pid=`pgrep rcu_tasks_trace_kthread`
+
+if [ -z $kthread_pid ]; then
+       echo "error: Couldn't find rcu_tasks_trace_kthread"
+       exit 1
+fi
+
+./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet 1 local-storage-tasks-trace
index 6c5e602..d9f40af 100644 (file)
@@ -41,6 +41,16 @@ function ops()
        echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
 }
 
+function local_storage()
+{
+       echo -n "hits throughput: "
+       echo -n "$*" | sed -E "s/.* hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+       echo -n -e ", hits latency: "
+       echo -n "$*" | sed -E "s/.* hits latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
+       echo -n ", important_hits throughput: "
+       echo "$*" | sed -E "s/.*important_hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+}
+
 function total()
 {
        echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
@@ -67,6 +77,13 @@ function summarize_ops()
        printf "%-20s %s\n" "$bench" "$(ops $summary)"
 }
 
+function summarize_local_storage()
+{
+       bench="$1"
+       summary=$(echo $2 | tail -n1)
+       printf "%-20s %s\n" "$bench" "$(local_storage $summary)"
+}
+
 function summarize_total()
 {
        bench="$1"
index 719ab56..8452095 100644 (file)
@@ -2,15 +2,6 @@
 #ifndef __BPF_LEGACY__
 #define __BPF_LEGACY__
 
-#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val)         \
-       struct ____btf_map_##name {                             \
-               type_key key;                                   \
-               type_val value;                                 \
-       };                                                      \
-       struct ____btf_map_##name                               \
-       __attribute__ ((section(".maps." #name), used))         \
-               ____btf_map_##name = { }
-
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
  */
index 3b3edc0..c05904d 100644 (file)
@@ -57,3 +57,9 @@ CONFIG_FPROBE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_MPTCP=y
+CONFIG_NETFILTER_SYNPROXY=y
+CONFIG_NETFILTER_XT_TARGET_CT=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_SYNPROXY=y
+CONFIG_IP_NF_RAW=y
index 59cf81e..bec1555 100644 (file)
@@ -436,7 +436,7 @@ struct nstoken *open_netns(const char *name)
        int err;
        struct nstoken *token;
 
-       token = malloc(sizeof(struct nstoken));
+       token = calloc(1, sizeof(struct nstoken));
        if (!ASSERT_OK_PTR(token, "malloc token"))
                return NULL;
 
index 380d7a2..4cd8a25 100644 (file)
@@ -120,6 +120,64 @@ static void check_nested_calls(struct bpf_loop *skel)
        bpf_link__destroy(link);
 }
 
+static void check_non_constant_callback(struct bpf_loop *skel)
+{
+       struct bpf_link *link =
+               bpf_program__attach(skel->progs.prog_non_constant_callback);
+
+       if (!ASSERT_OK_PTR(link, "link"))
+               return;
+
+       skel->bss->callback_selector = 0x0F;
+       usleep(1);
+       ASSERT_EQ(skel->bss->g_output, 0x0F, "g_output #1");
+
+       skel->bss->callback_selector = 0xF0;
+       usleep(1);
+       ASSERT_EQ(skel->bss->g_output, 0xF0, "g_output #2");
+
+       bpf_link__destroy(link);
+}
+
+static void check_stack(struct bpf_loop *skel)
+{
+       struct bpf_link *link = bpf_program__attach(skel->progs.stack_check);
+       const int max_key = 12;
+       int key;
+       int map_fd;
+
+       if (!ASSERT_OK_PTR(link, "link"))
+               return;
+
+       map_fd = bpf_map__fd(skel->maps.map1);
+
+       if (!ASSERT_GE(map_fd, 0, "bpf_map__fd"))
+               goto out;
+
+       for (key = 1; key <= max_key; ++key) {
+               int val = key;
+               int err = bpf_map_update_elem(map_fd, &key, &val, BPF_NOEXIST);
+
+               if (!ASSERT_OK(err, "bpf_map_update_elem"))
+                       goto out;
+       }
+
+       usleep(1);
+
+       for (key = 1; key <= max_key; ++key) {
+               int val;
+               int err = bpf_map_lookup_elem(map_fd, &key, &val);
+
+               if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+                       goto out;
+               if (!ASSERT_EQ(val, key + 1, "bad value in the map"))
+                       goto out;
+       }
+
+out:
+       bpf_link__destroy(link);
+}
+
 void test_bpf_loop(void)
 {
        struct bpf_loop *skel;
@@ -140,6 +198,10 @@ void test_bpf_loop(void)
                check_invalid_flags(skel);
        if (test__start_subtest("check_nested_calls"))
                check_nested_calls(skel);
+       if (test__start_subtest("check_non_constant_callback"))
+               check_non_constant_callback(skel);
+       if (test__start_subtest("check_stack"))
+               check_stack(skel);
 
        bpf_loop__destroy(skel);
 }
index e9a9a31..2959a52 100644 (file)
@@ -9,6 +9,9 @@
 #include "bpf_cubic.skel.h"
 #include "bpf_tcp_nogpl.skel.h"
 #include "bpf_dctcp_release.skel.h"
+#include "tcp_ca_write_sk_pacing.skel.h"
+#include "tcp_ca_incompl_cong_ops.skel.h"
+#include "tcp_ca_unsupp_cong_op.skel.h"
 
 #ifndef ENOTSUPP
 #define ENOTSUPP 524
@@ -322,6 +325,58 @@ static void test_rel_setsockopt(void)
        bpf_dctcp_release__destroy(rel_skel);
 }
 
+static void test_write_sk_pacing(void)
+{
+       struct tcp_ca_write_sk_pacing *skel;
+       struct bpf_link *link;
+
+       skel = tcp_ca_write_sk_pacing__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "open_and_load"))
+               return;
+
+       link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing);
+       ASSERT_OK_PTR(link, "attach_struct_ops");
+
+       bpf_link__destroy(link);
+       tcp_ca_write_sk_pacing__destroy(skel);
+}
+
+static void test_incompl_cong_ops(void)
+{
+       struct tcp_ca_incompl_cong_ops *skel;
+       struct bpf_link *link;
+
+       skel = tcp_ca_incompl_cong_ops__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "open_and_load"))
+               return;
+
+       /* That cong_avoid() and cong_control() are missing is only reported at
+        * this point:
+        */
+       link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops);
+       ASSERT_ERR_PTR(link, "attach_struct_ops");
+
+       bpf_link__destroy(link);
+       tcp_ca_incompl_cong_ops__destroy(skel);
+}
+
+static void test_unsupp_cong_op(void)
+{
+       libbpf_print_fn_t old_print_fn;
+       struct tcp_ca_unsupp_cong_op *skel;
+
+       err_str = "attach to unsupported member get_info";
+       found = false;
+       old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+       skel = tcp_ca_unsupp_cong_op__open_and_load();
+       ASSERT_NULL(skel, "open_and_load");
+       ASSERT_EQ(found, true, "expected_err_msg");
+
+       tcp_ca_unsupp_cong_op__destroy(skel);
+       libbpf_set_print(old_print_fn);
+}
+
 void test_bpf_tcp_ca(void)
 {
        if (test__start_subtest("dctcp"))
@@ -334,4 +389,10 @@ void test_bpf_tcp_ca(void)
                test_dctcp_fallback();
        if (test__start_subtest("rel_setsockopt"))
                test_rel_setsockopt();
+       if (test__start_subtest("write_sk_pacing"))
+               test_write_sk_pacing();
+       if (test__start_subtest("incompl_cong_ops"))
+               test_incompl_cong_ops();
+       if (test__start_subtest("unsupp_cong_op"))
+               test_unsupp_cong_op();
 }
index edb3871..941b010 100644 (file)
@@ -34,7 +34,6 @@ static bool always_log;
 #undef CHECK
 #define CHECK(condition, format...) _CHECK(condition, "check", duration, format)
 
-#define BTF_END_RAW 0xdeadbeef
 #define NAME_TBD 0xdeadb33f
 
 #define NAME_NTH(N) (0xfffe0000 | N)
@@ -4652,7 +4651,6 @@ struct btf_file_test {
 };
 
 static struct btf_file_test file_tests[] = {
-       { .file = "test_btf_haskv.o", },
        { .file = "test_btf_newkv.o", },
        { .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
 };
index 2f92feb..c8655ba 100644 (file)
@@ -543,7 +543,6 @@ static int __trigger_module_test_read(const struct core_reloc_test_case *test)
        return 0;
 }
 
-
 static const struct core_reloc_test_case test_cases[] = {
        /* validate we can find kernel image and use its BTF for relocs */
        {
@@ -556,6 +555,7 @@ static const struct core_reloc_test_case test_cases[] = {
                        .valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
                        .comm = "test_progs",
                        .comm_len = sizeof("test_progs"),
+                       .local_task_struct_matches = true,
                },
                .output_len = sizeof(struct core_reloc_kernel_output),
                .raw_tp_name = "sys_enter",
@@ -752,9 +752,10 @@ static const struct core_reloc_test_case test_cases[] = {
        SIZE_CASE(size___diff_offs),
        SIZE_ERR_CASE(size___err_ambiguous),
 
-       /* validate type existence and size relocations */
+       /* validate type existence, match, and size relocations */
        TYPE_BASED_CASE(type_based, {
                .struct_exists = 1,
+               .complex_struct_exists = 1,
                .union_exists = 1,
                .enum_exists = 1,
                .typedef_named_struct_exists = 1,
@@ -763,8 +764,24 @@ static const struct core_reloc_test_case test_cases[] = {
                .typedef_int_exists = 1,
                .typedef_enum_exists = 1,
                .typedef_void_ptr_exists = 1,
+               .typedef_restrict_ptr_exists = 1,
                .typedef_func_proto_exists = 1,
                .typedef_arr_exists = 1,
+
+               .struct_matches = 1,
+               .complex_struct_matches = 1,
+               .union_matches = 1,
+               .enum_matches = 1,
+               .typedef_named_struct_matches = 1,
+               .typedef_anon_struct_matches = 1,
+               .typedef_struct_ptr_matches = 1,
+               .typedef_int_matches = 1,
+               .typedef_enum_matches = 1,
+               .typedef_void_ptr_matches = 1,
+               .typedef_restrict_ptr_matches = 1,
+               .typedef_func_proto_matches = 1,
+               .typedef_arr_matches = 1,
+
                .struct_sz = sizeof(struct a_struct),
                .union_sz = sizeof(union a_union),
                .enum_sz = sizeof(enum an_enum),
@@ -780,6 +797,45 @@ static const struct core_reloc_test_case test_cases[] = {
        TYPE_BASED_CASE(type_based___all_missing, {
                /* all zeros */
        }),
+       TYPE_BASED_CASE(type_based___diff, {
+               .struct_exists = 1,
+               .complex_struct_exists = 1,
+               .union_exists = 1,
+               .enum_exists = 1,
+               .typedef_named_struct_exists = 1,
+               .typedef_anon_struct_exists = 1,
+               .typedef_struct_ptr_exists = 1,
+               .typedef_int_exists = 1,
+               .typedef_enum_exists = 1,
+               .typedef_void_ptr_exists = 1,
+               .typedef_func_proto_exists = 1,
+               .typedef_arr_exists = 1,
+
+               .struct_matches = 1,
+               .complex_struct_matches = 1,
+               .union_matches = 1,
+               .enum_matches = 1,
+               .typedef_named_struct_matches = 1,
+               .typedef_anon_struct_matches = 1,
+               .typedef_struct_ptr_matches = 1,
+               .typedef_int_matches = 0,
+               .typedef_enum_matches = 1,
+               .typedef_void_ptr_matches = 1,
+               .typedef_func_proto_matches = 0,
+               .typedef_arr_matches = 0,
+
+               .struct_sz = sizeof(struct a_struct___diff),
+               .union_sz = sizeof(union a_union___diff),
+               .enum_sz = sizeof(enum an_enum___diff),
+               .typedef_named_struct_sz = sizeof(named_struct_typedef___diff),
+               .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff),
+               .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff),
+               .typedef_int_sz = sizeof(int_typedef___diff),
+               .typedef_enum_sz = sizeof(enum_typedef___diff),
+               .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff),
+               .typedef_func_proto_sz = sizeof(func_proto_typedef___diff),
+               .typedef_arr_sz = sizeof(arr_typedef___diff),
+       }),
        TYPE_BASED_CASE(type_based___diff_sz, {
                .struct_exists = 1,
                .union_exists = 1,
@@ -792,6 +848,19 @@ static const struct core_reloc_test_case test_cases[] = {
                .typedef_void_ptr_exists = 1,
                .typedef_func_proto_exists = 1,
                .typedef_arr_exists = 1,
+
+               .struct_matches = 0,
+               .union_matches = 0,
+               .enum_matches = 0,
+               .typedef_named_struct_matches = 0,
+               .typedef_anon_struct_matches = 0,
+               .typedef_struct_ptr_matches = 1,
+               .typedef_int_matches = 0,
+               .typedef_enum_matches = 0,
+               .typedef_void_ptr_matches = 1,
+               .typedef_func_proto_matches = 0,
+               .typedef_arr_matches = 0,
+
                .struct_sz = sizeof(struct a_struct___diff_sz),
                .union_sz = sizeof(union a_union___diff_sz),
                .enum_sz = sizeof(enum an_enum___diff_sz),
@@ -806,10 +875,12 @@ static const struct core_reloc_test_case test_cases[] = {
        }),
        TYPE_BASED_CASE(type_based___incompat, {
                .enum_exists = 1,
+               .enum_matches = 1,
                .enum_sz = sizeof(enum an_enum),
        }),
        TYPE_BASED_CASE(type_based___fn_wrong_args, {
                .struct_exists = 1,
+               .struct_matches = 1,
                .struct_sz = sizeof(struct a_struct),
        }),
 
index 5b93d5d..335917d 100644 (file)
@@ -329,7 +329,7 @@ static int get_syms(char ***symsp, size_t *cntp)
        struct hashmap *map;
        char buf[256];
        FILE *f;
-       int err;
+       int err = 0;
 
        /*
         * The available_filter_functions contains many duplicates,
@@ -407,7 +407,7 @@ static void test_bench_attach(void)
        double attach_delta, detach_delta;
        struct bpf_link *link = NULL;
        char **syms = NULL;
-       size_t cnt, i;
+       size_t cnt = 0, i;
 
        if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms"))
                return;
diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
new file mode 100644 (file)
index 0000000..1102e4f
--- /dev/null
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "lsm_cgroup.skel.h"
+#include "lsm_cgroup_nonvoid.skel.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
+static struct btf *btf;
+
+static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func)
+{
+       LIBBPF_OPTS(bpf_prog_query_opts, p);
+       int cnt = 0;
+       int i;
+
+       ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query");
+
+       if (!attach_func)
+               return p.prog_cnt;
+
+       /* When attach_func is provided, count the number of progs that
+        * attach to the given symbol.
+        */
+
+       if (!btf)
+               btf = btf__load_vmlinux_btf();
+       if (!ASSERT_OK(libbpf_get_error(btf), "btf_vmlinux"))
+               return -1;
+
+       p.prog_ids = malloc(sizeof(u32) * p.prog_cnt);
+       p.prog_attach_flags = malloc(sizeof(u32) * p.prog_cnt);
+       ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query");
+
+       for (i = 0; i < p.prog_cnt; i++) {
+               struct bpf_prog_info info = {};
+               __u32 info_len = sizeof(info);
+               int fd;
+
+               fd = bpf_prog_get_fd_by_id(p.prog_ids[i]);
+               ASSERT_GE(fd, 0, "prog_get_fd_by_id");
+               ASSERT_OK(bpf_obj_get_info_by_fd(fd, &info, &info_len), "prog_info_by_fd");
+               close(fd);
+
+               if (info.attach_btf_id ==
+                   btf__find_by_name_kind(btf, attach_func, BTF_KIND_FUNC))
+                       cnt++;
+       }
+
+       free(p.prog_ids);
+       free(p.prog_attach_flags);
+
+       return cnt;
+}
+
+static void test_lsm_cgroup_functional(void)
+{
+       DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts);
+       DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+       int cgroup_fd = -1, cgroup_fd2 = -1, cgroup_fd3 = -1;
+       int listen_fd, client_fd, accepted_fd;
+       struct lsm_cgroup *skel = NULL;
+       int post_create_prog_fd2 = -1;
+       int post_create_prog_fd = -1;
+       int bind_link_fd2 = -1;
+       int bind_prog_fd2 = -1;
+       int alloc_prog_fd = -1;
+       int bind_prog_fd = -1;
+       int bind_link_fd = -1;
+       int clone_prog_fd = -1;
+       int err, fd, prio;
+       socklen_t socklen;
+
+       cgroup_fd3 = test__join_cgroup("/sock_policy_empty");
+       if (!ASSERT_GE(cgroup_fd3, 0, "create empty cgroup"))
+               goto close_cgroup;
+
+       cgroup_fd2 = test__join_cgroup("/sock_policy_reuse");
+       if (!ASSERT_GE(cgroup_fd2, 0, "create cgroup for reuse"))
+               goto close_cgroup;
+
+       cgroup_fd = test__join_cgroup("/sock_policy");
+       if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+               goto close_cgroup;
+
+       skel = lsm_cgroup__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "open_and_load"))
+               goto close_cgroup;
+
+       post_create_prog_fd = bpf_program__fd(skel->progs.socket_post_create);
+       post_create_prog_fd2 = bpf_program__fd(skel->progs.socket_post_create2);
+       bind_prog_fd = bpf_program__fd(skel->progs.socket_bind);
+       bind_prog_fd2 = bpf_program__fd(skel->progs.socket_bind2);
+       alloc_prog_fd = bpf_program__fd(skel->progs.socket_alloc);
+       clone_prog_fd = bpf_program__fd(skel->progs.socket_clone);
+
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 0, "prog count");
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 0, "total prog count");
+       err = bpf_prog_attach(alloc_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0);
+       if (err == -ENOTSUPP) {
+               test__skip();
+               goto close_cgroup;
+       }
+       if (!ASSERT_OK(err, "attach alloc_prog_fd"))
+               goto detach_cgroup;
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 1, "prog count");
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 1, "total prog count");
+
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 0, "prog count");
+       err = bpf_prog_attach(clone_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0);
+       if (!ASSERT_OK(err, "attach clone_prog_fd"))
+               goto detach_cgroup;
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 1, "prog count");
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 2, "total prog count");
+
+       /* Make sure replacing works. */
+
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 0, "prog count");
+       err = bpf_prog_attach(post_create_prog_fd, cgroup_fd,
+                             BPF_LSM_CGROUP, 0);
+       if (!ASSERT_OK(err, "attach post_create_prog_fd"))
+               goto detach_cgroup;
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count");
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count");
+
+       attach_opts.replace_prog_fd = post_create_prog_fd;
+       err = bpf_prog_attach_opts(post_create_prog_fd2, cgroup_fd,
+                                  BPF_LSM_CGROUP, &attach_opts);
+       if (!ASSERT_OK(err, "prog replace post_create_prog_fd"))
+               goto detach_cgroup;
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count");
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count");
+
+       /* Try the same attach/replace via link API. */
+
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 0, "prog count");
+       bind_link_fd = bpf_link_create(bind_prog_fd, cgroup_fd,
+                                      BPF_LSM_CGROUP, NULL);
+       if (!ASSERT_GE(bind_link_fd, 0, "link create bind_prog_fd"))
+               goto detach_cgroup;
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+
+       update_opts.old_prog_fd = bind_prog_fd;
+       update_opts.flags = BPF_F_REPLACE;
+
+       err = bpf_link_update(bind_link_fd, bind_prog_fd2, &update_opts);
+       if (!ASSERT_OK(err, "link update bind_prog_fd"))
+               goto detach_cgroup;
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+
+       /* Attach another instance of bind program to another cgroup.
+        * This should trigger the reuse of the trampoline shim (two
+        * programs attaching to the same btf_id).
+        */
+
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+       ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 0, "prog count");
+       bind_link_fd2 = bpf_link_create(bind_prog_fd2, cgroup_fd2,
+                                       BPF_LSM_CGROUP, NULL);
+       if (!ASSERT_GE(bind_link_fd2, 0, "link create bind_prog_fd2"))
+               goto detach_cgroup;
+       ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 1, "prog count");
+       ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+       ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count");
+
+       /* AF_UNIX is prohibited. */
+
+       fd = socket(AF_UNIX, SOCK_STREAM, 0);
+       ASSERT_LT(fd, 0, "socket(AF_UNIX)");
+       close(fd);
+
+       /* AF_INET6 gets default policy (sk_priority). */
+
+       fd = socket(AF_INET6, SOCK_STREAM, 0);
+       if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+               goto detach_cgroup;
+
+       prio = 0;
+       socklen = sizeof(prio);
+       ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+                 "getsockopt");
+       ASSERT_EQ(prio, 123, "sk_priority");
+
+       close(fd);
+
+       /* TX-only AF_PACKET is allowed. */
+
+       ASSERT_LT(socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)), 0,
+                 "socket(AF_PACKET, ..., ETH_P_ALL)");
+
+       fd = socket(AF_PACKET, SOCK_RAW, 0);
+       ASSERT_GE(fd, 0, "socket(AF_PACKET, ..., 0)");
+
+       /* TX-only AF_PACKET can not be rebound. */
+
+       struct sockaddr_ll sa = {
+               .sll_family = AF_PACKET,
+               .sll_protocol = htons(ETH_P_ALL),
+       };
+       ASSERT_LT(bind(fd, (struct sockaddr *)&sa, sizeof(sa)), 0,
+                 "bind(ETH_P_ALL)");
+
+       close(fd);
+
+       /* Trigger passive open. */
+
+       listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+       ASSERT_GE(listen_fd, 0, "start_server");
+       client_fd = connect_to_fd(listen_fd, 0);
+       ASSERT_GE(client_fd, 0, "connect_to_fd");
+       accepted_fd = accept(listen_fd, NULL, NULL);
+       ASSERT_GE(accepted_fd, 0, "accept");
+
+       prio = 0;
+       socklen = sizeof(prio);
+       ASSERT_GE(getsockopt(accepted_fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+                 "getsockopt");
+       ASSERT_EQ(prio, 234, "sk_priority");
+
+       /* These are replaced and never called. */
+       ASSERT_EQ(skel->bss->called_socket_post_create, 0, "called_create");
+       ASSERT_EQ(skel->bss->called_socket_bind, 0, "called_bind");
+
+       /* AF_INET6+SOCK_STREAM
+        * AF_PACKET+SOCK_RAW
+        * listen_fd
+        * client_fd
+        * accepted_fd
+        */
+       ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2");
+
+       /* start_server
+        * bind(ETH_P_ALL)
+        */
+       ASSERT_EQ(skel->bss->called_socket_bind2, 2, "called_bind2");
+       /* Single accept(). */
+       ASSERT_EQ(skel->bss->called_socket_clone, 1, "called_clone");
+
+       /* AF_UNIX+SOCK_STREAM (failed)
+        * AF_INET6+SOCK_STREAM
+        * AF_PACKET+SOCK_RAW (failed)
+        * AF_PACKET+SOCK_RAW
+        * listen_fd
+        * client_fd
+        * accepted_fd
+        */
+       ASSERT_EQ(skel->bss->called_socket_alloc, 7, "called_alloc");
+
+       close(listen_fd);
+       close(client_fd);
+       close(accepted_fd);
+
+       /* Make sure other cgroup doesn't trigger the programs. */
+
+       if (!ASSERT_OK(join_cgroup("/sock_policy_empty"), "join root cgroup"))
+               goto detach_cgroup;
+
+       fd = socket(AF_INET6, SOCK_STREAM, 0);
+       if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+               goto detach_cgroup;
+
+       prio = 0;
+       socklen = sizeof(prio);
+       ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+                 "getsockopt");
+       ASSERT_EQ(prio, 0, "sk_priority");
+
+       close(fd);
+
+detach_cgroup:
+       ASSERT_GE(bpf_prog_detach2(post_create_prog_fd2, cgroup_fd,
+                                  BPF_LSM_CGROUP), 0, "detach_create");
+       close(bind_link_fd);
+       /* Don't close bind_link_fd2, exercise cgroup release cleanup. */
+       ASSERT_GE(bpf_prog_detach2(alloc_prog_fd, cgroup_fd,
+                                  BPF_LSM_CGROUP), 0, "detach_alloc");
+       ASSERT_GE(bpf_prog_detach2(clone_prog_fd, cgroup_fd,
+                                  BPF_LSM_CGROUP), 0, "detach_clone");
+
+close_cgroup:
+       close(cgroup_fd);
+       close(cgroup_fd2);
+       close(cgroup_fd3);
+       lsm_cgroup__destroy(skel);
+}
+
+static void test_lsm_cgroup_nonvoid(void)
+{
+       struct lsm_cgroup_nonvoid *skel = NULL;
+
+       skel = lsm_cgroup_nonvoid__open_and_load();
+       ASSERT_NULL(skel, "open succeeds");
+       lsm_cgroup_nonvoid__destroy(skel);
+}
+
+void test_lsm_cgroup(void)
+{
+       if (test__start_subtest("functional"))
+               test_lsm_cgroup_functional();
+       if (test__start_subtest("nonvoid"))
+               test_lsm_cgroup_nonvoid();
+       btf__free(btf);
+}
index f4a13d9..c197261 100644 (file)
@@ -44,7 +44,7 @@ BTF_ID(union,   U)
 BTF_ID(func,    func)
 
 extern __u32 test_list_global[];
-BTF_ID_LIST_GLOBAL(test_list_global)
+BTF_ID_LIST_GLOBAL(test_list_global, 1)
 BTF_ID_UNUSED
 BTF_ID(typedef, S)
 BTF_ID(typedef, T)
index 9d211b5..7d23166 100644 (file)
@@ -394,7 +394,6 @@ void serial_test_sock_fields(void)
        test();
 
 done:
-       test_sock_fields__detach(skel);
        test_sock_fields__destroy(skel);
        if (child_cg_fd >= 0)
                close(child_cg_fd);
index 5f733d5..9ad9da0 100644 (file)
@@ -12,7 +12,7 @@ int lets_test_this(int);
 
 static volatile int idx = 2;
 static volatile __u64 bla = 0xFEDCBA9876543210ULL;
-static volatile short nums[] = {-1, -2, -3, };
+static volatile short nums[] = {-1, -2, -3, -4};
 
 static volatile struct {
        int x;
index fb77a12..874a846 100644 (file)
@@ -63,7 +63,7 @@ static bool expect_str(char *buf, size_t size, const char *str, const char *name
 static void test_synproxy(bool xdp)
 {
        int server_fd = -1, client_fd = -1, accept_fd = -1;
-       char *prog_id, *prog_id_end;
+       char *prog_id = NULL, *prog_id_end;
        struct nstoken *ns = NULL;
        FILE *ctrl_file = NULL;
        char buf[CMD_OUT_BUF_SIZE];
index e085652..de1fc82 100644 (file)
@@ -11,11 +11,19 @@ struct callback_ctx {
        int output;
 };
 
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, 32);
+       __type(key, int);
+       __type(value, int);
+} map1 SEC(".maps");
+
 /* These should be set by the user program */
 u32 nested_callback_nr_loops;
 u32 stop_index = -1;
 u32 nr_loops;
 int pid;
+int callback_selector;
 
 /* Making these global variables so that the userspace program
  * can verify the output through the skeleton
@@ -111,3 +119,109 @@ int prog_nested_calls(void *ctx)
 
        return 0;
 }
+
+static int callback_set_f0(int i, void *ctx)
+{
+       g_output = 0xF0;
+       return 0;
+}
+
+static int callback_set_0f(int i, void *ctx)
+{
+       g_output = 0x0F;
+       return 0;
+}
+
+/*
+ * non-constant callback is a corner case for bpf_loop inline logic
+ */
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int prog_non_constant_callback(void *ctx)
+{
+       struct callback_ctx data = {};
+
+       if (bpf_get_current_pid_tgid() >> 32 != pid)
+               return 0;
+
+       int (*callback)(int i, void *ctx);
+
+       g_output = 0;
+
+       if (callback_selector == 0x0F)
+               callback = callback_set_0f;
+       else
+               callback = callback_set_f0;
+
+       bpf_loop(1, callback, NULL, 0);
+
+       return 0;
+}
+
+static int stack_check_inner_callback(void *ctx)
+{
+       return 0;
+}
+
+static int map1_lookup_elem(int key)
+{
+       int *val = bpf_map_lookup_elem(&map1, &key);
+
+       return val ? *val : -1;
+}
+
+static void map1_update_elem(int key, int val)
+{
+       bpf_map_update_elem(&map1, &key, &val, BPF_ANY);
+}
+
+static int stack_check_outer_callback(void *ctx)
+{
+       int a = map1_lookup_elem(1);
+       int b = map1_lookup_elem(2);
+       int c = map1_lookup_elem(3);
+       int d = map1_lookup_elem(4);
+       int e = map1_lookup_elem(5);
+       int f = map1_lookup_elem(6);
+
+       bpf_loop(1, stack_check_inner_callback, NULL, 0);
+
+       map1_update_elem(1, a + 1);
+       map1_update_elem(2, b + 1);
+       map1_update_elem(3, c + 1);
+       map1_update_elem(4, d + 1);
+       map1_update_elem(5, e + 1);
+       map1_update_elem(6, f + 1);
+
+       return 0;
+}
+
+/* Some of the local variables in stack_check and
+ * stack_check_outer_callback would be allocated on stack by
+ * compiler. This test should verify that stack content for these
+ * variables is preserved between calls to bpf_loop (might be an issue
+ * if loop inlining allocates stack slots incorrectly).
+ */
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int stack_check(void *ctx)
+{
+       if (bpf_get_current_pid_tgid() >> 32 != pid)
+               return 0;
+
+       int a = map1_lookup_elem(7);
+       int b = map1_lookup_elem(8);
+       int c = map1_lookup_elem(9);
+       int d = map1_lookup_elem(10);
+       int e = map1_lookup_elem(11);
+       int f = map1_lookup_elem(12);
+
+       bpf_loop(1, stack_check_outer_callback, NULL, 0);
+
+       map1_update_elem(7,  a + 1);
+       map1_update_elem(8, b + 1);
+       map1_update_elem(9, c + 1);
+       map1_update_elem(10, d + 1);
+       map1_update_elem(11, e + 1);
+       map1_update_elem(12, f + 1);
+
+       return 0;
+}
index 1c1289b..98dd2c4 100644 (file)
@@ -8,6 +8,7 @@
 #define SOL_SOCKET             1
 #define SO_SNDBUF              7
 #define __SO_ACCEPTCON         (1 << 16)
+#define SO_PRIORITY            12
 
 #define SOL_TCP                        6
 #define TCP_CONGESTION         13
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c
new file mode 100644 (file)
index 0000000..57ae2c2
--- /dev/null
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___diff x) {}
index 26e1033..fd8e1b4 100644 (file)
@@ -13,6 +13,7 @@ struct core_reloc_kernel_output {
        int valid[10];
        char comm[sizeof("test_progs")];
        int comm_len;
+       bool local_task_struct_matches;
 };
 
 /*
@@ -860,10 +861,11 @@ struct core_reloc_size___err_ambiguous2 {
 };
 
 /*
- * TYPE EXISTENCE & SIZE
+ * TYPE EXISTENCE, MATCH & SIZE
  */
 struct core_reloc_type_based_output {
        bool struct_exists;
+       bool complex_struct_exists;
        bool union_exists;
        bool enum_exists;
        bool typedef_named_struct_exists;
@@ -872,9 +874,24 @@ struct core_reloc_type_based_output {
        bool typedef_int_exists;
        bool typedef_enum_exists;
        bool typedef_void_ptr_exists;
+       bool typedef_restrict_ptr_exists;
        bool typedef_func_proto_exists;
        bool typedef_arr_exists;
 
+       bool struct_matches;
+       bool complex_struct_matches;
+       bool union_matches;
+       bool enum_matches;
+       bool typedef_named_struct_matches;
+       bool typedef_anon_struct_matches;
+       bool typedef_struct_ptr_matches;
+       bool typedef_int_matches;
+       bool typedef_enum_matches;
+       bool typedef_void_ptr_matches;
+       bool typedef_restrict_ptr_matches;
+       bool typedef_func_proto_matches;
+       bool typedef_arr_matches;
+
        int struct_sz;
        int union_sz;
        int enum_sz;
@@ -892,6 +909,14 @@ struct a_struct {
        int x;
 };
 
+struct a_complex_struct {
+       union {
+               struct a_struct * restrict a;
+               void *b;
+       } x;
+       volatile long y;
+};
+
 union a_union {
        int y;
        int z;
@@ -916,6 +941,7 @@ typedef int int_typedef;
 typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
 
 typedef void *void_ptr_typedef;
+typedef int *restrict restrict_ptr_typedef;
 
 typedef int (*func_proto_typedef)(long);
 
@@ -923,22 +949,86 @@ typedef char arr_typedef[20];
 
 struct core_reloc_type_based {
        struct a_struct f1;
-       union a_union f2;
-       enum an_enum f3;
-       named_struct_typedef f4;
-       anon_struct_typedef f5;
-       struct_ptr_typedef f6;
-       int_typedef f7;
-       enum_typedef f8;
-       void_ptr_typedef f9;
-       func_proto_typedef f10;
-       arr_typedef f11;
+       struct a_complex_struct f2;
+       union a_union f3;
+       enum an_enum f4;
+       named_struct_typedef f5;
+       anon_struct_typedef f6;
+       struct_ptr_typedef f7;
+       int_typedef f8;
+       enum_typedef f9;
+       void_ptr_typedef f10;
+       restrict_ptr_typedef f11;
+       func_proto_typedef f12;
+       arr_typedef f13;
 };
 
 /* no types in target */
 struct core_reloc_type_based___all_missing {
 };
 
+/* different member orders, enum variant values, signedness, etc */
+struct a_struct___diff {
+       int x;
+       int a;
+};
+
+struct a_struct___forward;
+
+struct a_complex_struct___diff {
+       union {
+               struct a_struct___forward *a;
+               void *b;
+       } x;
+       volatile long y;
+};
+
+union a_union___diff {
+       int z;
+       int y;
+};
+
+typedef struct a_struct___diff named_struct_typedef___diff;
+
+typedef struct { int z, x, y; } anon_struct_typedef___diff;
+
+typedef struct {
+       int c;
+       int b;
+       int a;
+} *struct_ptr_typedef___diff;
+
+enum an_enum___diff {
+       AN_ENUM_VAL2___diff = 0,
+       AN_ENUM_VAL1___diff = 42,
+       AN_ENUM_VAL3___diff = 1,
+};
+
+typedef unsigned int int_typedef___diff;
+
+typedef enum { TYPEDEF_ENUM_VAL2___diff, TYPEDEF_ENUM_VAL1___diff = 50 } enum_typedef___diff;
+
+typedef const void *void_ptr_typedef___diff;
+
+typedef int_typedef___diff (*func_proto_typedef___diff)(long);
+
+typedef char arr_typedef___diff[3];
+
+struct core_reloc_type_based___diff {
+       struct a_struct___diff f1;
+       struct a_complex_struct___diff f2;
+       union a_union___diff f3;
+       enum an_enum___diff f4;
+       named_struct_typedef___diff f5;
+       anon_struct_typedef___diff f6;
+       struct_ptr_typedef___diff f7;
+       int_typedef___diff f8;
+       enum_typedef___diff f9;
+       void_ptr_typedef___diff f10;
+       func_proto_typedef___diff f11;
+       arr_typedef___diff f12;
+};
+
 /* different type sizes, extra modifiers, anon vs named enums, etc */
 struct a_struct___diff_sz {
        long x;
diff --git a/tools/testing/selftests/bpf/progs/local_storage_bench.c b/tools/testing/selftests/bpf/progs/local_storage_bench.c
new file mode 100644 (file)
index 0000000..2c3234c
--- /dev/null
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define HASHMAP_SZ 4194304
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+       __uint(max_entries, 1000);
+       __type(key, int);
+       __type(value, int);
+       __array(values, struct {
+               __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+               __uint(map_flags, BPF_F_NO_PREALLOC);
+               __type(key, int);
+               __type(value, int);
+       });
+} array_of_local_storage_maps SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+       __uint(max_entries, 1000);
+       __type(key, int);
+       __type(value, int);
+       __array(values, struct {
+               __uint(type, BPF_MAP_TYPE_HASH);
+               __uint(max_entries, HASHMAP_SZ);
+               __type(key, int);
+               __type(value, int);
+       });
+} array_of_hash_maps SEC(".maps");
+
+long important_hits;
+long hits;
+
+/* set from user-space */
+const volatile unsigned int use_hashmap;
+const volatile unsigned int hashmap_num_keys;
+const volatile unsigned int num_maps;
+const volatile unsigned int interleave;
+
+struct loop_ctx {
+       struct task_struct *task;
+       long loop_hits;
+       long loop_important_hits;
+};
+
+static int do_lookup(unsigned int elem, struct loop_ctx *lctx)
+{
+       void *map, *inner_map;
+       int idx = 0;
+
+       if (use_hashmap)
+               map = &array_of_hash_maps;
+       else
+               map = &array_of_local_storage_maps;
+
+       inner_map = bpf_map_lookup_elem(map, &elem);
+       if (!inner_map)
+               return -1;
+
+       if (use_hashmap) {
+               idx = bpf_get_prandom_u32() % hashmap_num_keys;
+               bpf_map_lookup_elem(inner_map, &idx);
+       } else {
+               bpf_task_storage_get(inner_map, lctx->task, &idx,
+                                    BPF_LOCAL_STORAGE_GET_F_CREATE);
+       }
+
+       lctx->loop_hits++;
+       if (!elem)
+               lctx->loop_important_hits++;
+       return 0;
+}
+
+static long loop(u32 index, void *ctx)
+{
+       struct loop_ctx *lctx = (struct loop_ctx *)ctx;
+       unsigned int map_idx = index % num_maps;
+
+       do_lookup(map_idx, lctx);
+       if (interleave && map_idx % 3 == 0)
+               do_lookup(0, lctx);
+       return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int get_local(void *ctx)
+{
+       struct loop_ctx lctx;
+
+       lctx.task = bpf_get_current_task_btf();
+       lctx.loop_hits = 0;
+       lctx.loop_important_hits = 0;
+       bpf_loop(10000, &loop, &lctx, 0);
+       __sync_add_and_fetch(&hits, lctx.loop_hits);
+       __sync_add_and_fetch(&important_hits, lctx.loop_important_hits);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c
new file mode 100644 (file)
index 0000000..03bf69f
--- /dev/null
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+       __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __type(key, int);
+       __type(value, int);
+} task_storage SEC(".maps");
+
+long hits;
+long gp_hits;
+long gp_times;
+long current_gp_start;
+long unexpected;
+bool postgp_seen;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int get_local(void *ctx)
+{
+       struct task_struct *task;
+       int idx;
+       int *s;
+
+       idx = 0;
+       task = bpf_get_current_task_btf();
+       s = bpf_task_storage_get(&task_storage, task, &idx,
+                                BPF_LOCAL_STORAGE_GET_F_CREATE);
+       if (!s)
+               return 0;
+
+       *s = 3;
+       bpf_task_storage_delete(&task_storage, task);
+       __sync_add_and_fetch(&hits, 1);
+       return 0;
+}
+
+SEC("fentry/rcu_tasks_trace_pregp_step")
+int pregp_step(struct pt_regs *ctx)
+{
+       current_gp_start = bpf_ktime_get_ns();
+       return 0;
+}
+
+SEC("fentry/rcu_tasks_trace_postgp")
+int postgp(struct pt_regs *ctx)
+{
+       if (!current_gp_start && postgp_seen) {
+               /* Will only happen if prog tracing rcu_tasks_trace_pregp_step doesn't
+                * execute before this prog
+                */
+               __sync_add_and_fetch(&unexpected, 1);
+               return 0;
+       }
+
+       __sync_add_and_fetch(&gp_times, bpf_ktime_get_ns() - current_gp_start);
+       __sync_add_and_fetch(&gp_hits, 1);
+       current_gp_start = 0;
+       postgp_seen = true;
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
new file mode 100644 (file)
index 0000000..4f2d60b
--- /dev/null
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef AF_PACKET
+#define AF_PACKET 17
+#endif
+
+#ifndef AF_UNIX
+#define AF_UNIX 1
+#endif
+
+#ifndef EPERM
+#define EPERM 1
+#endif
+
+struct {
+       __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+       __type(key, __u64);
+       __type(value, __u64);
+} cgroup_storage SEC(".maps");
+
+int called_socket_post_create;
+int called_socket_post_create2;
+int called_socket_bind;
+int called_socket_bind2;
+int called_socket_alloc;
+int called_socket_clone;
+
+static __always_inline int test_local_storage(void)
+{
+       __u64 *val;
+
+       val = bpf_get_local_storage(&cgroup_storage, 0);
+       if (!val)
+               return 0;
+       *val += 1;
+
+       return 1;
+}
+
+static __always_inline int real_create(struct socket *sock, int family,
+                                      int protocol)
+{
+       struct sock *sk;
+       int prio = 123;
+
+       /* Reject non-tx-only AF_PACKET. */
+       if (family == AF_PACKET && protocol != 0)
+               return 0; /* EPERM */
+
+       sk = sock->sk;
+       if (!sk)
+               return 1;
+
+       /* The rest of the sockets get default policy. */
+       if (bpf_setsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+               return 0; /* EPERM */
+
+       /* Make sure bpf_getsockopt is allowed and works. */
+       prio = 0;
+       if (bpf_getsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+               return 0; /* EPERM */
+       if (prio != 123)
+               return 0; /* EPERM */
+
+       /* Can access cgroup local storage. */
+       if (!test_local_storage())
+               return 0; /* EPERM */
+
+       return 1;
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family,
+            int type, int protocol, int kern)
+{
+       called_socket_post_create++;
+       return real_create(sock, family, protocol);
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create2, struct socket *sock, int family,
+            int type, int protocol, int kern)
+{
+       called_socket_post_create2++;
+       return real_create(sock, family, protocol);
+}
+
+static __always_inline int real_bind(struct socket *sock,
+                                    struct sockaddr *address,
+                                    int addrlen)
+{
+       struct sockaddr_ll sa = {};
+
+       if (sock->sk->__sk_common.skc_family != AF_PACKET)
+               return 1;
+
+       if (sock->sk->sk_kern_sock)
+               return 1;
+
+       bpf_probe_read_kernel(&sa, sizeof(sa), address);
+       if (sa.sll_protocol)
+               return 0; /* EPERM */
+
+       /* Can access cgroup local storage. */
+       if (!test_local_storage())
+               return 0; /* EPERM */
+
+       return 1;
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+            int addrlen)
+{
+       called_socket_bind++;
+       return real_bind(sock, address, addrlen);
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_bind")
+int BPF_PROG(socket_bind2, struct socket *sock, struct sockaddr *address,
+            int addrlen)
+{
+       called_socket_bind2++;
+       return real_bind(sock, address, addrlen);
+}
+
+/* __cgroup_bpf_run_lsm_current (via bpf_lsm_current_hooks) */
+SEC("lsm_cgroup/sk_alloc_security")
+int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority)
+{
+       called_socket_alloc++;
+       if (family == AF_UNIX)
+               return 0; /* EPERM */
+
+       /* Can access cgroup local storage. */
+       if (!test_local_storage())
+               return 0; /* EPERM */
+
+       return 1;
+}
+
+/* __cgroup_bpf_run_lsm_sock */
+SEC("lsm_cgroup/inet_csk_clone")
+int BPF_PROG(socket_clone, struct sock *newsk, const struct request_sock *req)
+{
+       int prio = 234;
+
+       if (!newsk)
+               return 1;
+
+       /* Accepted request sockets get a different priority. */
+       if (bpf_setsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+               return 1;
+
+       /* Make sure bpf_getsockopt is allowed and works. */
+       prio = 0;
+       if (bpf_getsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+               return 1;
+       if (prio != 234)
+               return 1;
+
+       /* Can access cgroup local storage. */
+       if (!test_local_storage())
+               return 1;
+
+       called_socket_clone++;
+
+       return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c
new file mode 100644 (file)
index 0000000..6cb0f16
--- /dev/null
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm_cgroup/inet_csk_clone")
+int BPF_PROG(nonvoid_socket_clone, struct sock *newsk, const struct request_sock *req)
+{
+       /* Can not return any errors from void LSM hooks. */
+       return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
new file mode 100644 (file)
index 0000000..7bb872f
--- /dev/null
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+       return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/incompl_cong_ops_ssthresh")
+__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
+{
+       return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/incompl_cong_ops_undo_cwnd")
+__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
+{
+       return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops incompl_cong_ops = {
+       /* Intentionally leaving out any of the required cong_avoid() and
+        * cong_control() here.
+        */
+       .ssthresh = (void *)incompl_cong_ops_ssthresh,
+       .undo_cwnd = (void *)incompl_cong_ops_undo_cwnd,
+       .name = "bpf_incompl_ops",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
new file mode 100644 (file)
index 0000000..c06f4a4
--- /dev/null
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/unsupp_cong_op_get_info")
+size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
+               union tcp_cc_info *info)
+{
+       return 0;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops unsupp_cong_op = {
+       .get_info = (void *)unsupp_cong_op_get_info,
+       .name = "bpf_unsupp_op",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
new file mode 100644 (file)
index 0000000..4344770
--- /dev/null
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define USEC_PER_SEC 1000000UL
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+       return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/write_sk_pacing_init")
+void BPF_PROG(write_sk_pacing_init, struct sock *sk)
+{
+#ifdef ENABLE_ATOMICS_TESTS
+       __sync_bool_compare_and_swap(&sk->sk_pacing_status, SK_PACING_NONE,
+                                    SK_PACING_NEEDED);
+#else
+       sk->sk_pacing_status = SK_PACING_NEEDED;
+#endif
+}
+
+SEC("struct_ops/write_sk_pacing_cong_control")
+void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
+             const struct rate_sample *rs)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+       unsigned long rate =
+               ((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) /
+               (tp->srtt_us ?: 1U << 3);
+       sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate);
+}
+
+SEC("struct_ops/write_sk_pacing_ssthresh")
+__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk)
+{
+       return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/write_sk_pacing_undo_cwnd")
+__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk)
+{
+       return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops write_sk_pacing = {
+       .init = (void *)write_sk_pacing_init,
+       .cong_control = (void *)write_sk_pacing_cong_control,
+       .ssthresh = (void *)write_sk_pacing_ssthresh,
+       .undo_cwnd = (void *)write_sk_pacing_undo_cwnd,
+       .name = "bpf_w_sk_pacing",
+};
diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
deleted file mode 100644 (file)
index 07c94df..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2018 Facebook */
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_legacy.h"
-
-struct ipv_counts {
-       unsigned int v4;
-       unsigned int v6;
-};
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-struct bpf_map_def SEC("maps") btf_map = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(struct ipv_counts),
-       .max_entries = 4,
-};
-#pragma GCC diagnostic pop
-
-BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
-
-__attribute__((noinline))
-int test_long_fname_2(void)
-{
-       struct ipv_counts *counts;
-       int key = 0;
-
-       counts = bpf_map_lookup_elem(&btf_map, &key);
-       if (!counts)
-               return 0;
-
-       counts->v6++;
-
-       return 0;
-}
-
-__attribute__((noinline))
-int test_long_fname_1(void)
-{
-       return test_long_fname_2();
-}
-
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(void *arg)
-{
-       return test_long_fname_1();
-}
-
-char _license[] SEC("license") = "GPL";
index 762671a..251854a 100644 (file)
@@ -9,19 +9,6 @@ struct ipv_counts {
        unsigned int v6;
 };
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-/* just to validate we can handle maps in multiple sections */
-struct bpf_map_def SEC("maps") btf_map_legacy = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(long long),
-       .max_entries = 4,
-};
-#pragma GCC diagnostic pop
-
-BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
-
 struct {
        __uint(type, BPF_MAP_TYPE_ARRAY);
        __uint(max_entries, 4);
@@ -41,11 +28,6 @@ int test_long_fname_2(void)
 
        counts->v6++;
 
-       /* just verify we can reference both maps */
-       counts = bpf_map_lookup_elem(&btf_map_legacy, &key);
-       if (!counts)
-               return 0;
-
        return 0;
 }
 
index 145028b..a17dd83 100644 (file)
@@ -21,6 +21,7 @@ struct core_reloc_kernel_output {
        /* we have test_progs[-flavor], so cut flavor part */
        char comm[sizeof("test_progs")];
        int comm_len;
+       bool local_task_struct_matches;
 };
 
 struct task_struct {
@@ -30,11 +31,25 @@ struct task_struct {
        struct task_struct *group_leader;
 };
 
+struct mm_struct___wrong {
+    int abc_whatever_should_not_exist;
+};
+
+struct task_struct___local {
+    int pid;
+    struct mm_struct___wrong *mm;
+};
+
 #define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
 
 SEC("raw_tracepoint/sys_enter")
 int test_core_kernel(void *ctx)
 {
+       /* Support for the BPF_TYPE_MATCHES argument to the
+        * __builtin_preserve_type_info builtin was added at some point during
+        * development of clang 15 and it's what we require for this test.
+        */
+#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
        struct task_struct *task = (void *)bpf_get_current_task();
        struct core_reloc_kernel_output *out = (void *)&data.out;
        uint64_t pid_tgid = bpf_get_current_pid_tgid();
@@ -93,6 +108,10 @@ int test_core_kernel(void *ctx)
                group_leader, group_leader, group_leader, group_leader,
                comm);
 
+       out->local_task_struct_matches = bpf_core_type_matches(struct task_struct___local);
+#else
+       data.skip = true;
+#endif
        return 0;
 }
 
index fb60f81..2edb4df 100644 (file)
@@ -19,6 +19,14 @@ struct a_struct {
        int x;
 };
 
+struct a_complex_struct {
+       union {
+               struct a_struct *a;
+               void *b;
+       } x;
+       volatile long y;
+};
+
 union a_union {
        int y;
        int z;
@@ -43,6 +51,7 @@ typedef int int_typedef;
 typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
 
 typedef void *void_ptr_typedef;
+typedef int *restrict restrict_ptr_typedef;
 
 typedef int (*func_proto_typedef)(long);
 
@@ -50,6 +59,7 @@ typedef char arr_typedef[20];
 
 struct core_reloc_type_based_output {
        bool struct_exists;
+       bool complex_struct_exists;
        bool union_exists;
        bool enum_exists;
        bool typedef_named_struct_exists;
@@ -58,9 +68,24 @@ struct core_reloc_type_based_output {
        bool typedef_int_exists;
        bool typedef_enum_exists;
        bool typedef_void_ptr_exists;
+       bool typedef_restrict_ptr_exists;
        bool typedef_func_proto_exists;
        bool typedef_arr_exists;
 
+       bool struct_matches;
+       bool complex_struct_matches;
+       bool union_matches;
+       bool enum_matches;
+       bool typedef_named_struct_matches;
+       bool typedef_anon_struct_matches;
+       bool typedef_struct_ptr_matches;
+       bool typedef_int_matches;
+       bool typedef_enum_matches;
+       bool typedef_void_ptr_matches;
+       bool typedef_restrict_ptr_matches;
+       bool typedef_func_proto_matches;
+       bool typedef_arr_matches;
+
        int struct_sz;
        int union_sz;
        int enum_sz;
@@ -77,10 +102,17 @@ struct core_reloc_type_based_output {
 SEC("raw_tracepoint/sys_enter")
 int test_core_type_based(void *ctx)
 {
-#if __has_builtin(__builtin_preserve_type_info)
+       /* Support for the BPF_TYPE_MATCHES argument to the
+        * __builtin_preserve_type_info builtin was added at some point during
+        * development of clang 15 and it's what we require for this test. Part of it
+        * could run with merely __builtin_preserve_type_info (which could be checked
+        * separately), but we have to find an upper bound.
+        */
+#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
        struct core_reloc_type_based_output *out = (void *)&data.out;
 
        out->struct_exists = bpf_core_type_exists(struct a_struct);
+       out->complex_struct_exists = bpf_core_type_exists(struct a_complex_struct);
        out->union_exists = bpf_core_type_exists(union a_union);
        out->enum_exists = bpf_core_type_exists(enum an_enum);
        out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef);
@@ -89,9 +121,24 @@ int test_core_type_based(void *ctx)
        out->typedef_int_exists = bpf_core_type_exists(int_typedef);
        out->typedef_enum_exists = bpf_core_type_exists(enum_typedef);
        out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef);
+       out->typedef_restrict_ptr_exists = bpf_core_type_exists(restrict_ptr_typedef);
        out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef);
        out->typedef_arr_exists = bpf_core_type_exists(arr_typedef);
 
+       out->struct_matches = bpf_core_type_matches(struct a_struct);
+       out->complex_struct_matches = bpf_core_type_matches(struct a_complex_struct);
+       out->union_matches = bpf_core_type_matches(union a_union);
+       out->enum_matches = bpf_core_type_matches(enum an_enum);
+       out->typedef_named_struct_matches = bpf_core_type_matches(named_struct_typedef);
+       out->typedef_anon_struct_matches = bpf_core_type_matches(anon_struct_typedef);
+       out->typedef_struct_ptr_matches = bpf_core_type_matches(struct_ptr_typedef);
+       out->typedef_int_matches = bpf_core_type_matches(int_typedef);
+       out->typedef_enum_matches = bpf_core_type_matches(enum_typedef);
+       out->typedef_void_ptr_matches = bpf_core_type_matches(void_ptr_typedef);
+       out->typedef_restrict_ptr_matches = bpf_core_type_matches(restrict_ptr_typedef);
+       out->typedef_func_proto_matches = bpf_core_type_matches(func_proto_typedef);
+       out->typedef_arr_matches = bpf_core_type_matches(arr_typedef);
+
        out->struct_sz = bpf_core_type_size(struct a_struct);
        out->union_sz = bpf_core_type_size(union a_union);
        out->enum_sz = bpf_core_type_size(enum an_enum);
index 9fd62e9..736686e 100644 (file)
@@ -77,16 +77,30 @@ struct {
        __uint(max_entries, MAX_ALLOWED_PORTS);
 } allowed_ports SEC(".maps");
 
+/* Some symbols defined in net/netfilter/nf_conntrack_bpf.c are unavailable in
+ * vmlinux.h if CONFIG_NF_CONNTRACK=m, so they are redefined locally.
+ */
+
+struct bpf_ct_opts___local {
+       s32 netns_id;
+       s32 error;
+       u8 l4proto;
+       u8 dir;
+       u8 reserved[2];
+} __attribute__((preserve_access_index));
+
+#define BPF_F_CURRENT_NETNS (-1)
+
 extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx,
                                         struct bpf_sock_tuple *bpf_tuple,
                                         __u32 len_tuple,
-                                        struct bpf_ct_opts *opts,
+                                        struct bpf_ct_opts___local *opts,
                                         __u32 len_opts) __ksym;
 
 extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx,
                                         struct bpf_sock_tuple *bpf_tuple,
                                         u32 len_tuple,
-                                        struct bpf_ct_opts *opts,
+                                        struct bpf_ct_opts___local *opts,
                                         u32 len_opts) __ksym;
 
 extern void bpf_ct_release(struct nf_conn *ct) __ksym;
@@ -393,7 +407,7 @@ static __always_inline int tcp_dissect(void *data, void *data_end,
 
 static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp)
 {
-       struct bpf_ct_opts ct_lookup_opts = {
+       struct bpf_ct_opts___local ct_lookup_opts = {
                .netns_id = BPF_F_CURRENT_NETNS,
                .l4proto = IPPROTO_TCP,
        };
@@ -714,10 +728,6 @@ static __always_inline int syncookie_handle_ack(struct header_pointers *hdr)
 static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end,
                                           struct header_pointers *hdr, bool xdp)
 {
-       struct bpf_ct_opts ct_lookup_opts = {
-               .netns_id = BPF_F_CURRENT_NETNS,
-               .l4proto = IPPROTO_TCP,
-       };
        int ret;
 
        ret = tcp_dissect(data, data_end, hdr);
index e443e65..a6410be 100755 (executable)
@@ -471,12 +471,6 @@ class BashcompExtractor(FileExtractor):
     def get_prog_attach_types(self):
         return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
 
-    def get_map_types(self):
-        return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES')
-
-    def get_cgroup_attach_types(self):
-        return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES')
-
 def verify(first_set, second_set, message):
     """
     Print all values that differ between two sets.
@@ -516,17 +510,12 @@ def main():
     man_map_types = man_map_info.get_map_types()
     man_map_info.close()
 
-    bashcomp_info = BashcompExtractor()
-    bashcomp_map_types = bashcomp_info.get_map_types()
-
     verify(source_map_types, help_map_types,
             f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {MapFileExtractor.filename} (do_help() TYPE):')
     verify(source_map_types, man_map_types,
             f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {ManMapExtractor.filename} (TYPE):')
     verify(help_map_options, man_map_options,
             f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
-    verify(source_map_types, bashcomp_map_types,
-            f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):')
 
     # Attach types (names)
 
@@ -542,8 +531,10 @@ def main():
     man_prog_attach_types = man_prog_info.get_attach_types()
     man_prog_info.close()
 
-    bashcomp_info.reset_read() # We stopped at map types, rewind
+
+    bashcomp_info = BashcompExtractor()
     bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
+    bashcomp_info.close()
 
     verify(source_prog_attach_types, help_prog_attach_types,
             f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
@@ -568,17 +559,12 @@ def main():
     man_cgroup_attach_types = man_cgroup_info.get_attach_types()
     man_cgroup_info.close()
 
-    bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types()
-    bashcomp_info.close()
-
     verify(source_cgroup_attach_types, help_cgroup_attach_types,
             f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
     verify(source_cgroup_attach_types, man_cgroup_attach_types,
             f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
     verify(help_cgroup_options, man_cgroup_options,
             f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
-    verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types,
-            f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):')
 
     # Options for remaining commands
 
index 38782bd..fb4f471 100644 (file)
@@ -4,6 +4,8 @@
 #ifndef _TEST_BTF_H
 #define _TEST_BTF_H
 
+#define BTF_END_RAW 0xdeadbeef
+
 #define BTF_INFO_ENC(kind, kind_flag, vlen)                    \
        ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 
index 372579c..f9d553f 100644 (file)
 #endif
 
 #define MAX_INSNS      BPF_MAXINSNS
+#define MAX_EXPECTED_INSNS     32
+#define MAX_UNEXPECTED_INSNS   32
 #define MAX_TEST_INSNS 1000000
 #define MAX_FIXUPS     8
 #define MAX_NR_MAPS    23
 #define MAX_TEST_RUNS  8
 #define POINTER_VALUE  0xcafe4all
 #define TEST_DATA_LEN  64
+#define MAX_FUNC_INFOS 8
+#define MAX_BTF_STRINGS        256
+#define MAX_BTF_TYPES  256
+
+#define INSN_OFF_MASK  ((__s16)0xFFFF)
+#define INSN_IMM_MASK  ((__s32)0xFFFFFFFF)
+#define SKIP_INSNS()   BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef)
+
+#define DEFAULT_LIBBPF_LOG_LEVEL       4
+#define VERBOSE_LIBBPF_LOG_LEVEL       1
 
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS     (1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT           (1 << 1)
@@ -79,6 +91,23 @@ struct bpf_test {
        const char *descr;
        struct bpf_insn insns[MAX_INSNS];
        struct bpf_insn *fill_insns;
+       /* If specified, test engine looks for this sequence of
+        * instructions in the BPF program after loading. Allows to
+        * test rewrites applied by verifier.  Use values
+        * INSN_OFF_MASK and INSN_IMM_MASK to mask `off` and `imm`
+        * fields if content does not matter.  The test case fails if
+        * specified instructions are not found.
+        *
+        * The sequence could be split into sub-sequences by adding
+        * SKIP_INSNS instruction at the end of each sub-sequence. In
+        * such case sub-sequences are searched for one after another.
+        */
+       struct bpf_insn expected_insns[MAX_EXPECTED_INSNS];
+       /* If specified, test engine applies same pattern matching
+        * logic as for `expected_insns`. If the specified pattern is
+        * matched test case is marked as failed.
+        */
+       struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS];
        int fixup_map_hash_8b[MAX_FIXUPS];
        int fixup_map_hash_48b[MAX_FIXUPS];
        int fixup_map_hash_16b[MAX_FIXUPS];
@@ -135,6 +164,14 @@ struct bpf_test {
        };
        enum bpf_attach_type expected_attach_type;
        const char *kfunc;
+       struct bpf_func_info func_info[MAX_FUNC_INFOS];
+       int func_info_cnt;
+       char btf_strings[MAX_BTF_STRINGS];
+       /* A set of BTF types to load when specified,
+        * use macro definitions from test_btf.h,
+        * must end with BTF_END_RAW
+        */
+       __u32 btf_types[MAX_BTF_TYPES];
 };
 
 /* Note we want this to be 64 bit aligned so that the end of our array is
@@ -388,6 +425,45 @@ static void bpf_fill_torturous_jumps(struct bpf_test *self)
        }
 }
 
+static void bpf_fill_big_prog_with_loop_1(struct bpf_test *self)
+{
+       struct bpf_insn *insn = self->fill_insns;
+       /* This test was added to catch a specific use after free
+        * error, which happened upon BPF program reallocation.
+        * Reallocation is handled by core.c:bpf_prog_realloc, which
+        * reuses old memory if page boundary is not crossed. The
+        * value of `len` is chosen to cross this boundary on bpf_loop
+        * patching.
+        */
+       const int len = getpagesize() - 25;
+       int callback_load_idx;
+       int callback_idx;
+       int i = 0;
+
+       insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1);
+       callback_load_idx = i;
+       insn[i++] = BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW,
+                                BPF_REG_2, BPF_PSEUDO_FUNC, 0,
+                                777 /* filled below */);
+       insn[i++] = BPF_RAW_INSN(0, 0, 0, 0, 0);
+       insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0);
+       insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0);
+       insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop);
+
+       while (i < len - 3)
+               insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+       insn[i++] = BPF_EXIT_INSN();
+
+       callback_idx = i;
+       insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+       insn[i++] = BPF_EXIT_INSN();
+
+       insn[callback_load_idx].imm = callback_idx - callback_load_idx - 1;
+       self->func_info[1].insn_off = callback_idx;
+       self->prog_len = i;
+       assert(i == len);
+}
+
 /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
 #define BPF_SK_LOOKUP(func)                                            \
        /* struct bpf_sock_tuple tuple = {} */                          \
@@ -664,34 +740,66 @@ static __u32 btf_raw_types[] = {
        BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */
 };
 
-static int load_btf(void)
+static char bpf_vlog[UINT_MAX >> 8];
+
+static int load_btf_spec(__u32 *types, int types_len,
+                        const char *strings, int strings_len)
 {
        struct btf_header hdr = {
                .magic = BTF_MAGIC,
                .version = BTF_VERSION,
                .hdr_len = sizeof(struct btf_header),
-               .type_len = sizeof(btf_raw_types),
-               .str_off = sizeof(btf_raw_types),
-               .str_len = sizeof(btf_str_sec),
+               .type_len = types_len,
+               .str_off = types_len,
+               .str_len = strings_len,
        };
        void *ptr, *raw_btf;
        int btf_fd;
+       LIBBPF_OPTS(bpf_btf_load_opts, opts,
+                   .log_buf = bpf_vlog,
+                   .log_size = sizeof(bpf_vlog),
+                   .log_level = (verbose
+                                 ? VERBOSE_LIBBPF_LOG_LEVEL
+                                 : DEFAULT_LIBBPF_LOG_LEVEL),
+       );
 
-       ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) +
-                              sizeof(btf_str_sec));
+       raw_btf = malloc(sizeof(hdr) + types_len + strings_len);
 
+       ptr = raw_btf;
        memcpy(ptr, &hdr, sizeof(hdr));
        ptr += sizeof(hdr);
-       memcpy(ptr, btf_raw_types, hdr.type_len);
+       memcpy(ptr, types, hdr.type_len);
        ptr += hdr.type_len;
-       memcpy(ptr, btf_str_sec, hdr.str_len);
+       memcpy(ptr, strings, hdr.str_len);
        ptr += hdr.str_len;
 
-       btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, NULL);
-       free(raw_btf);
+       btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, &opts);
        if (btf_fd < 0)
-               return -1;
-       return btf_fd;
+               printf("Failed to load BTF spec: '%s'\n", strerror(errno));
+
+       free(raw_btf);
+
+       return btf_fd < 0 ? -1 : btf_fd;
+}
+
+static int load_btf(void)
+{
+       return load_btf_spec(btf_raw_types, sizeof(btf_raw_types),
+                            btf_str_sec, sizeof(btf_str_sec));
+}
+
+static int load_btf_for_test(struct bpf_test *test)
+{
+       int types_num = 0;
+
+       while (types_num < MAX_BTF_TYPES &&
+              test->btf_types[types_num] != BTF_END_RAW)
+               ++types_num;
+
+       int types_len = types_num * sizeof(test->btf_types[0]);
+
+       return load_btf_spec(test->btf_types, types_len,
+                            test->btf_strings, sizeof(test->btf_strings));
 }
 
 static int create_map_spin_lock(void)
@@ -770,8 +878,6 @@ static int create_map_kptr(void)
        return fd;
 }
 
-static char bpf_vlog[UINT_MAX >> 8];
-
 static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
                          struct bpf_insn *prog, int *map_fds)
 {
@@ -1126,10 +1232,218 @@ static bool cmp_str_seq(const char *log, const char *exp)
        return true;
 }
 
+static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt)
+{
+       struct bpf_prog_info info = {};
+       __u32 info_len = sizeof(info);
+       __u32 xlated_prog_len;
+       __u32 buf_element_size = sizeof(struct bpf_insn);
+
+       if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) {
+               perror("bpf_obj_get_info_by_fd failed");
+               return -1;
+       }
+
+       xlated_prog_len = info.xlated_prog_len;
+       if (xlated_prog_len % buf_element_size) {
+               printf("Program length %d is not multiple of %d\n",
+                      xlated_prog_len, buf_element_size);
+               return -1;
+       }
+
+       *cnt = xlated_prog_len / buf_element_size;
+       *buf = calloc(*cnt, buf_element_size);
+       if (!buf) {
+               perror("can't allocate xlated program buffer");
+               return -ENOMEM;
+       }
+
+       bzero(&info, sizeof(info));
+       info.xlated_prog_len = xlated_prog_len;
+       info.xlated_prog_insns = (__u64)*buf;
+       if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) {
+               perror("second bpf_obj_get_info_by_fd failed");
+               goto out_free_buf;
+       }
+
+       return 0;
+
+out_free_buf:
+       free(*buf);
+       return -1;
+}
+
+static bool is_null_insn(struct bpf_insn *insn)
+{
+       struct bpf_insn null_insn = {};
+
+       return memcmp(insn, &null_insn, sizeof(null_insn)) == 0;
+}
+
+static bool is_skip_insn(struct bpf_insn *insn)
+{
+       struct bpf_insn skip_insn = SKIP_INSNS();
+
+       return memcmp(insn, &skip_insn, sizeof(skip_insn)) == 0;
+}
+
+static int null_terminated_insn_len(struct bpf_insn *seq, int max_len)
+{
+       int i;
+
+       for (i = 0; i < max_len; ++i) {
+               if (is_null_insn(&seq[i]))
+                       return i;
+       }
+       return max_len;
+}
+
+static bool compare_masked_insn(struct bpf_insn *orig, struct bpf_insn *masked)
+{
+       struct bpf_insn orig_masked;
+
+       memcpy(&orig_masked, orig, sizeof(orig_masked));
+       if (masked->imm == INSN_IMM_MASK)
+               orig_masked.imm = INSN_IMM_MASK;
+       if (masked->off == INSN_OFF_MASK)
+               orig_masked.off = INSN_OFF_MASK;
+
+       return memcmp(&orig_masked, masked, sizeof(orig_masked)) == 0;
+}
+
+static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq,
+                           int seq_len, int subseq_len)
+{
+       int i, j;
+
+       if (subseq_len > seq_len)
+               return -1;
+
+       for (i = 0; i < seq_len - subseq_len + 1; ++i) {
+               bool found = true;
+
+               for (j = 0; j < subseq_len; ++j) {
+                       if (!compare_masked_insn(&seq[i + j], &subseq[j])) {
+                               found = false;
+                               break;
+                       }
+               }
+               if (found)
+                       return i;
+       }
+
+       return -1;
+}
+
+static int find_skip_insn_marker(struct bpf_insn *seq, int len)
+{
+       int i;
+
+       for (i = 0; i < len; ++i)
+               if (is_skip_insn(&seq[i]))
+                       return i;
+
+       return -1;
+}
+
+/* Return true if all sub-sequences in `subseqs` could be found in
+ * `seq` one after another. Sub-sequences are separated by a single
+ * nil instruction.
+ */
+static bool find_all_insn_subseqs(struct bpf_insn *seq, struct bpf_insn *subseqs,
+                                 int seq_len, int max_subseqs_len)
+{
+       int subseqs_len = null_terminated_insn_len(subseqs, max_subseqs_len);
+
+       while (subseqs_len > 0) {
+               int skip_idx = find_skip_insn_marker(subseqs, subseqs_len);
+               int cur_subseq_len = skip_idx < 0 ? subseqs_len : skip_idx;
+               int subseq_idx = find_insn_subseq(seq, subseqs,
+                                                 seq_len, cur_subseq_len);
+
+               if (subseq_idx < 0)
+                       return false;
+               seq += subseq_idx + cur_subseq_len;
+               seq_len -= subseq_idx + cur_subseq_len;
+               subseqs += cur_subseq_len + 1;
+               subseqs_len -= cur_subseq_len + 1;
+       }
+
+       return true;
+}
+
+static void print_insn(struct bpf_insn *buf, int cnt)
+{
+       int i;
+
+       printf("  addr  op d s off  imm\n");
+       for (i = 0; i < cnt; ++i) {
+               struct bpf_insn *insn = &buf[i];
+
+               if (is_null_insn(insn))
+                       break;
+
+               if (is_skip_insn(insn))
+                       printf("  ...\n");
+               else
+                       printf("  %04x: %02x %1x %x %04hx %08x\n",
+                              i, insn->code, insn->dst_reg,
+                              insn->src_reg, insn->off, insn->imm);
+       }
+}
+
+static bool check_xlated_program(struct bpf_test *test, int fd_prog)
+{
+       struct bpf_insn *buf;
+       int cnt;
+       bool result = true;
+       bool check_expected = !is_null_insn(test->expected_insns);
+       bool check_unexpected = !is_null_insn(test->unexpected_insns);
+
+       if (!check_expected && !check_unexpected)
+               goto out;
+
+       if (get_xlated_program(fd_prog, &buf, &cnt)) {
+               printf("FAIL: can't get xlated program\n");
+               result = false;
+               goto out;
+       }
+
+       if (check_expected &&
+           !find_all_insn_subseqs(buf, test->expected_insns,
+                                  cnt, MAX_EXPECTED_INSNS)) {
+               printf("FAIL: can't find expected subsequence of instructions\n");
+               result = false;
+               if (verbose) {
+                       printf("Program:\n");
+                       print_insn(buf, cnt);
+                       printf("Expected subsequence:\n");
+                       print_insn(test->expected_insns, MAX_EXPECTED_INSNS);
+               }
+       }
+
+       if (check_unexpected &&
+           find_all_insn_subseqs(buf, test->unexpected_insns,
+                                 cnt, MAX_UNEXPECTED_INSNS)) {
+               printf("FAIL: found unexpected subsequence of instructions\n");
+               result = false;
+               if (verbose) {
+                       printf("Program:\n");
+                       print_insn(buf, cnt);
+                       printf("Un-expected subsequence:\n");
+                       print_insn(test->unexpected_insns, MAX_UNEXPECTED_INSNS);
+               }
+       }
+
+       free(buf);
+ out:
+       return result;
+}
+
 static void do_test_single(struct bpf_test *test, bool unpriv,
                           int *passes, int *errors)
 {
-       int fd_prog, expected_ret, alignment_prevented_execution;
+       int fd_prog, btf_fd, expected_ret, alignment_prevented_execution;
        int prog_len, prog_type = test->prog_type;
        struct bpf_insn *prog = test->insns;
        LIBBPF_OPTS(bpf_prog_load_opts, opts);
@@ -1141,8 +1455,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
        __u32 pflags;
        int i, err;
 
+       fd_prog = -1;
        for (i = 0; i < MAX_NR_MAPS; i++)
                map_fds[i] = -1;
+       btf_fd = -1;
 
        if (!prog_type)
                prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
@@ -1175,11 +1491,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
        opts.expected_attach_type = test->expected_attach_type;
        if (verbose)
-               opts.log_level = 1;
+               opts.log_level = VERBOSE_LIBBPF_LOG_LEVEL;
        else if (expected_ret == VERBOSE_ACCEPT)
                opts.log_level = 2;
        else
-               opts.log_level = 4;
+               opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
        opts.prog_flags = pflags;
 
        if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
@@ -1197,6 +1513,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                opts.attach_btf_id = attach_btf_id;
        }
 
+       if (test->btf_types[0] != 0) {
+               btf_fd = load_btf_for_test(test);
+               if (btf_fd < 0)
+                       goto fail_log;
+               opts.prog_btf_fd = btf_fd;
+       }
+
+       if (test->func_info_cnt != 0) {
+               opts.func_info = test->func_info;
+               opts.func_info_cnt = test->func_info_cnt;
+               opts.func_info_rec_size = sizeof(test->func_info[0]);
+       }
+
        opts.log_buf = bpf_vlog;
        opts.log_size = sizeof(bpf_vlog);
        fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts);
@@ -1262,6 +1591,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
        if (verbose)
                printf(", verifier log:\n%s", bpf_vlog);
 
+       if (!check_xlated_program(test, fd_prog))
+               goto fail_log;
+
        run_errs = 0;
        run_successes = 0;
        if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
@@ -1305,6 +1637,7 @@ close_fds:
        if (test->fill_insns)
                free(test->fill_insns);
        close(fd_prog);
+       close(btf_fd);
        for (i = 0; i < MAX_NR_MAPS; i++)
                close(map_fds[i]);
        sched_yield();
index 5675002..096a957 100755 (executable)
@@ -47,7 +47,7 @@
 #       conflict with any existing interface
 #   * tests the veth and xsk layers of the topology
 #
-# See the source xdpxceiver.c for information on each test
+# See the source xskxceiver.c for information on each test
 #
 # Kernel configuration:
 # ---------------------
@@ -160,14 +160,14 @@ statusList=()
 
 TEST_NAME="XSK_SELFTESTS_SOFTIRQ"
 
-execxdpxceiver
+exec_xskxceiver
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
 TEST_NAME="XSK_SELFTESTS_BUSY_POLL"
 busy_poll=1
 
 setup_vethPairs
-execxdpxceiver
+exec_xskxceiver
 
 ## END TESTS
 
diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
new file mode 100644 (file)
index 0000000..2d00236
--- /dev/null
@@ -0,0 +1,263 @@
+#define BTF_TYPES \
+       .btf_strings = "\0int\0i\0ctx\0callback\0main\0", \
+       .btf_types = { \
+       /* 1: int   */ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), \
+       /* 2: int*  */ BTF_PTR_ENC(1), \
+       /* 3: void* */ BTF_PTR_ENC(0), \
+       /* 4: int __(void*) */ BTF_FUNC_PROTO_ENC(1, 1), \
+               BTF_FUNC_PROTO_ARG_ENC(7, 3), \
+       /* 5: int __(int, int*) */ BTF_FUNC_PROTO_ENC(1, 2), \
+               BTF_FUNC_PROTO_ARG_ENC(5, 1), \
+               BTF_FUNC_PROTO_ARG_ENC(7, 2), \
+       /* 6: main      */ BTF_FUNC_ENC(20, 4), \
+       /* 7: callback  */ BTF_FUNC_ENC(11, 5), \
+       BTF_END_RAW \
+       }
+
+#define MAIN_TYPE      6
+#define CALLBACK_TYPE  7
+
+/* can't use BPF_CALL_REL, jit_subprogs adjusts IMM & OFF
+ * fields for pseudo calls
+ */
+#define PSEUDO_CALL_INSN() \
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_CALL, \
+                    INSN_OFF_MASK, INSN_IMM_MASK)
+
+/* can't use BPF_FUNC_loop constant,
+ * do_mix_fixups adjusts the IMM field
+ */
+#define HELPER_CALL_INSN() \
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, INSN_OFF_MASK, INSN_IMM_MASK)
+
+{
+       "inline simple bpf_loop call",
+       .insns = {
+       /* main */
+       /* force verifier state branching to verify logic on first and
+        * subsequent bpf_loop insn processing steps
+        */
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 2),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+
+       BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6),
+       BPF_RAW_INSN(0, 0, 0, 0, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       /* callback */
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .expected_insns = { PSEUDO_CALL_INSN() },
+       .unexpected_insns = { HELPER_CALL_INSN() },
+       .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       .result = ACCEPT,
+       .runs = 0,
+       .func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } },
+       .func_info_cnt = 2,
+       BTF_TYPES
+},
+{
+       "don't inline bpf_loop call, flags non-zero",
+       .insns = {
+       /* main */
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+       BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+       BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 9),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+       BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 7),
+       BPF_RAW_INSN(0, 0, 0, 0, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 1),
+       BPF_JMP_IMM(BPF_JA, 0, 0, -10),
+       /* callback */
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .expected_insns = { HELPER_CALL_INSN() },
+       .unexpected_insns = { PSEUDO_CALL_INSN() },
+       .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       .result = ACCEPT,
+       .runs = 0,
+       .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
+       .func_info_cnt = 2,
+       BTF_TYPES
+},
+{
+       "don't inline bpf_loop call, callback non-constant",
+       .insns = {
+       /* main */
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 4), /* pick a random callback */
+
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+       BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 10),
+       BPF_RAW_INSN(0, 0, 0, 0, 0),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+       BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8),
+       BPF_RAW_INSN(0, 0, 0, 0, 0),
+
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       /* callback */
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       /* callback #2 */
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .expected_insns = { HELPER_CALL_INSN() },
+       .unexpected_insns = { PSEUDO_CALL_INSN() },
+       .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       .result = ACCEPT,
+       .runs = 0,
+       .func_info = {
+               { 0, MAIN_TYPE },
+               { 14, CALLBACK_TYPE },
+               { 16, CALLBACK_TYPE }
+       },
+       .func_info_cnt = 3,
+       BTF_TYPES
+},
+{
+       "bpf_loop_inline and a dead func",
+       .insns = {
+       /* main */
+
+       /* A reference to callback #1 to make verifier count it as a func.
+        * This reference is overwritten below and callback #1 is dead.
+        */
+       BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 9),
+       BPF_RAW_INSN(0, 0, 0, 0, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+       BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8),
+       BPF_RAW_INSN(0, 0, 0, 0, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       /* callback */
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       /* callback #2 */
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .expected_insns = { PSEUDO_CALL_INSN() },
+       .unexpected_insns = { HELPER_CALL_INSN() },
+       .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       .result = ACCEPT,
+       .runs = 0,
+       .func_info = {
+               { 0, MAIN_TYPE },
+               { 10, CALLBACK_TYPE },
+               { 12, CALLBACK_TYPE }
+       },
+       .func_info_cnt = 3,
+       BTF_TYPES
+},
+{
+       "bpf_loop_inline stack locations for loop vars",
+       .insns = {
+       /* main */
+       BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77),
+       /* bpf_loop call #1 */
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+       BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 22),
+       BPF_RAW_INSN(0, 0, 0, 0, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+       /* bpf_loop call #2 */
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+       BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 16),
+       BPF_RAW_INSN(0, 0, 0, 0, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+       /* call func and exit */
+       BPF_CALL_REL(2),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       /* func */
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+       BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6),
+       BPF_RAW_INSN(0, 0, 0, 0, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       /* callback */
+       BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .expected_insns = {
+       BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77),
+       SKIP_INSNS(),
+       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40),
+       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32),
+       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24),
+       SKIP_INSNS(),
+       /* offsets are the same as in the first call */
+       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40),
+       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32),
+       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24),
+       SKIP_INSNS(),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55),
+       SKIP_INSNS(),
+       /* offsets differ from main because of different offset
+        * in BPF_ST_MEM instruction
+        */
+       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -56),
+       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -48),
+       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -40),
+       },
+       .unexpected_insns = { HELPER_CALL_INSN() },
+       .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       .result = ACCEPT,
+       .func_info = {
+               { 0, MAIN_TYPE },
+               { 16, MAIN_TYPE },
+               { 25, CALLBACK_TYPE },
+       },
+       .func_info_cnt = 3,
+       BTF_TYPES
+},
+{
+       "inline bpf_loop call in a big program",
+       .insns = {},
+       .fill_helper = bpf_fill_big_prog_with_loop_1,
+       .expected_insns = { PSEUDO_CALL_INSN() },
+       .unexpected_insns = { HELPER_CALL_INSN() },
+       .result = ACCEPT,
+       .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
+       .func_info_cnt = 2,
+       BTF_TYPES
+},
+
+#undef HELPER_CALL_INSN
+#undef PSEUDO_CALL_INSN
+#undef CALLBACK_TYPE
+#undef MAIN_TYPE
+#undef BTF_TYPES
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
deleted file mode 100644 (file)
index e5992a6..0000000
+++ /dev/null
@@ -1,1665 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2020 Intel Corporation. */
-
-/*
- * Some functions in this program are taken from
- * Linux kernel samples/bpf/xdpsock* and modified
- * for use.
- *
- * See test_xsk.sh for detailed information on test topology
- * and prerequisite network setup.
- *
- * This test program contains two threads, each thread is single socket with
- * a unique UMEM. It validates in-order packet delivery and packet content
- * by sending packets to each other.
- *
- * Tests Information:
- * ------------------
- * These selftests test AF_XDP SKB and Native/DRV modes using veth
- * Virtual Ethernet interfaces.
- *
- * For each mode, the following tests are run:
- *    a. nopoll - soft-irq processing in run-to-completion mode
- *    b. poll - using poll() syscall
- *    c. Socket Teardown
- *       Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
- *       both sockets, then repeat multiple times. Only nopoll mode is used
- *    d. Bi-directional sockets
- *       Configure sockets as bi-directional tx/rx sockets, sets up fill and
- *       completion rings on each socket, tx/rx in both directions. Only nopoll
- *       mode is used
- *    e. Statistics
- *       Trigger some error conditions and ensure that the appropriate statistics
- *       are incremented. Within this test, the following statistics are tested:
- *       i.   rx dropped
- *            Increase the UMEM frame headroom to a value which results in
- *            insufficient space in the rx buffer for both the packet and the headroom.
- *       ii.  tx invalid
- *            Set the 'len' field of tx descriptors to an invalid value (umem frame
- *            size + 1).
- *       iii. rx ring full
- *            Reduce the size of the RX ring to a fraction of the fill ring size.
- *       iv.  fill queue empty
- *            Do not populate the fill queue and then try to receive pkts.
- *    f. bpf_link resource persistence
- *       Configure sockets at indexes 0 and 1, run a traffic on queue ids 0,
- *       then remove xsk sockets from queue 0 on both veth interfaces and
- *       finally run a traffic on queues ids 1
- *    g. unaligned mode
- *    h. tests for invalid and corner case Tx descriptors so that the correct ones
- *       are discarded and let through, respectively.
- *    i. 2K frame size tests
- *
- * Total tests: 12
- *
- * Flow:
- * -----
- * - Single process spawns two threads: Tx and Rx
- * - Each of these two threads attach to a veth interface within their assigned
- *   namespaces
- * - Each thread Creates one AF_XDP socket connected to a unique umem for each
- *   veth interface
- * - Tx thread Transmits 10k packets from veth<xxxx> to veth<yyyy>
- * - Rx thread verifies if all 10k packets were received and delivered in-order,
- *   and have the right content
- *
- * Enable/disable packet dump mode:
- * --------------------------
- * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
- * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
- */
-
-#define _GNU_SOURCE
-#include <fcntl.h>
-#include <errno.h>
-#include <getopt.h>
-#include <asm/barrier.h>
-#include <linux/if_link.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <arpa/inet.h>
-#include <net/if.h>
-#include <locale.h>
-#include <poll.h>
-#include <pthread.h>
-#include <signal.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stddef.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/queue.h>
-#include <time.h>
-#include <unistd.h>
-#include <stdatomic.h>
-#include <bpf/xsk.h>
-#include "xdpxceiver.h"
-#include "../kselftest.h"
-
-/* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf.
- * Until xdpxceiver is either moved or re-writed into libxdp, suppress
- * deprecation warnings in this file
- */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
-static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
-static const char *IP1 = "192.168.100.162";
-static const char *IP2 = "192.168.100.161";
-static const u16 UDP_PORT1 = 2020;
-static const u16 UDP_PORT2 = 2121;
-
-static void __exit_with_error(int error, const char *file, const char *func, int line)
-{
-       ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
-                             strerror(error));
-       ksft_exit_xfail();
-}
-
-#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
-
-#define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV"
-#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
-
-static void report_failure(struct test_spec *test)
-{
-       if (test->fail)
-               return;
-
-       ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
-                             test->name);
-       test->fail = true;
-}
-
-static void memset32_htonl(void *dest, u32 val, u32 size)
-{
-       u32 *ptr = (u32 *)dest;
-       int i;
-
-       val = htonl(val);
-
-       for (i = 0; i < (size & (~0x3)); i += 4)
-               ptr[i >> 2] = val;
-}
-
-/*
- * Fold a partial checksum
- * This function code has been taken from
- * Linux kernel include/asm-generic/checksum.h
- */
-static __u16 csum_fold(__u32 csum)
-{
-       u32 sum = (__force u32)csum;
-
-       sum = (sum & 0xffff) + (sum >> 16);
-       sum = (sum & 0xffff) + (sum >> 16);
-       return (__force __u16)~sum;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static u32 from64to32(u64 x)
-{
-       /* add up 32-bit and 32-bit for 32+c bit */
-       x = (x & 0xffffffff) + (x >> 32);
-       /* add up carry.. */
-       x = (x & 0xffffffff) + (x >> 32);
-       return (u32)x;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
-{
-       unsigned long long s = (__force u32)sum;
-
-       s += (__force u32)saddr;
-       s += (__force u32)daddr;
-#ifdef __BIG_ENDIAN__
-       s += proto + len;
-#else
-       s += (proto + len) << 8;
-#endif
-       return (__force __u32)from64to32(s);
-}
-
-/*
- * This function has been taken from
- * Linux kernel include/asm-generic/checksum.h
- */
-static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
-{
-       return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
-}
-
-static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
-{
-       u32 csum = 0;
-       u32 cnt = 0;
-
-       /* udp hdr and data */
-       for (; cnt < len; cnt += 2)
-               csum += udp_pkt[cnt >> 1];
-
-       return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
-}
-
-static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr)
-{
-       memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN);
-       memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN);
-       eth_hdr->h_proto = htons(ETH_P_IP);
-}
-
-static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr)
-{
-       ip_hdr->version = IP_PKT_VER;
-       ip_hdr->ihl = 0x5;
-       ip_hdr->tos = IP_PKT_TOS;
-       ip_hdr->tot_len = htons(IP_PKT_SIZE);
-       ip_hdr->id = 0;
-       ip_hdr->frag_off = 0;
-       ip_hdr->ttl = IPDEFTTL;
-       ip_hdr->protocol = IPPROTO_UDP;
-       ip_hdr->saddr = ifobject->src_ip;
-       ip_hdr->daddr = ifobject->dst_ip;
-       ip_hdr->check = 0;
-}
-
-static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject,
-                       struct udphdr *udp_hdr)
-{
-       udp_hdr->source = htons(ifobject->src_port);
-       udp_hdr->dest = htons(ifobject->dst_port);
-       udp_hdr->len = htons(UDP_PKT_SIZE);
-       memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE);
-}
-
-static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
-{
-       udp_hdr->check = 0;
-       udp_hdr->check =
-           udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
-}
-
-static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size)
-{
-       struct xsk_umem_config cfg = {
-               .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
-               .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
-               .frame_size = umem->frame_size,
-               .frame_headroom = umem->frame_headroom,
-               .flags = XSK_UMEM__DEFAULT_FLAGS
-       };
-       int ret;
-
-       if (umem->unaligned_mode)
-               cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
-
-       ret = xsk_umem__create(&umem->umem, buffer, size,
-                              &umem->fq, &umem->cq, &cfg);
-       if (ret)
-               return ret;
-
-       umem->buffer = buffer;
-       return 0;
-}
-
-static void enable_busy_poll(struct xsk_socket_info *xsk)
-{
-       int sock_opt;
-
-       sock_opt = 1;
-       if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
-                      (void *)&sock_opt, sizeof(sock_opt)) < 0)
-               exit_with_error(errno);
-
-       sock_opt = 20;
-       if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
-                      (void *)&sock_opt, sizeof(sock_opt)) < 0)
-               exit_with_error(errno);
-
-       sock_opt = BATCH_SIZE;
-       if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
-                      (void *)&sock_opt, sizeof(sock_opt)) < 0)
-               exit_with_error(errno);
-}
-
-static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
-                               struct ifobject *ifobject, bool shared)
-{
-       struct xsk_socket_config cfg = {};
-       struct xsk_ring_cons *rxr;
-       struct xsk_ring_prod *txr;
-
-       xsk->umem = umem;
-       cfg.rx_size = xsk->rxqsize;
-       cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-       cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
-       cfg.xdp_flags = ifobject->xdp_flags;
-       cfg.bind_flags = ifobject->bind_flags;
-       if (shared)
-               cfg.bind_flags |= XDP_SHARED_UMEM;
-
-       txr = ifobject->tx_on ? &xsk->tx : NULL;
-       rxr = ifobject->rx_on ? &xsk->rx : NULL;
-       return xsk_socket__create(&xsk->xsk, ifobject->ifname, 0, umem->umem, rxr, txr, &cfg);
-}
-
-static struct option long_options[] = {
-       {"interface", required_argument, 0, 'i'},
-       {"busy-poll", no_argument, 0, 'b'},
-       {"dump-pkts", no_argument, 0, 'D'},
-       {"verbose", no_argument, 0, 'v'},
-       {0, 0, 0, 0}
-};
-
-static void usage(const char *prog)
-{
-       const char *str =
-               "  Usage: %s [OPTIONS]\n"
-               "  Options:\n"
-               "  -i, --interface      Use interface\n"
-               "  -D, --dump-pkts      Dump packets L2 - L5\n"
-               "  -v, --verbose        Verbose output\n"
-               "  -b, --busy-poll      Enable busy poll\n";
-
-       ksft_print_msg(str, prog);
-}
-
-static int switch_namespace(const char *nsname)
-{
-       char fqns[26] = "/var/run/netns/";
-       int nsfd;
-
-       if (!nsname || strlen(nsname) == 0)
-               return -1;
-
-       strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1);
-       nsfd = open(fqns, O_RDONLY);
-
-       if (nsfd == -1)
-               exit_with_error(errno);
-
-       if (setns(nsfd, 0) == -1)
-               exit_with_error(errno);
-
-       print_verbose("NS switched: %s\n", nsname);
-
-       return nsfd;
-}
-
-static bool validate_interface(struct ifobject *ifobj)
-{
-       if (!strcmp(ifobj->ifname, ""))
-               return false;
-       return true;
-}
-
-static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx, int argc,
-                              char **argv)
-{
-       struct ifobject *ifobj;
-       u32 interface_nb = 0;
-       int option_index, c;
-
-       opterr = 0;
-
-       for (;;) {
-               char *sptr, *token;
-
-               c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index);
-               if (c == -1)
-                       break;
-
-               switch (c) {
-               case 'i':
-                       if (interface_nb == 0)
-                               ifobj = ifobj_tx;
-                       else if (interface_nb == 1)
-                               ifobj = ifobj_rx;
-                       else
-                               break;
-
-                       sptr = strndupa(optarg, strlen(optarg));
-                       memcpy(ifobj->ifname, strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
-                       token = strsep(&sptr, ",");
-                       if (token)
-                               memcpy(ifobj->nsname, token, MAX_INTERFACES_NAMESPACE_CHARS);
-                       interface_nb++;
-                       break;
-               case 'D':
-                       opt_pkt_dump = true;
-                       break;
-               case 'v':
-                       opt_verbose = true;
-                       break;
-               case 'b':
-                       ifobj_tx->busy_poll = true;
-                       ifobj_rx->busy_poll = true;
-                       break;
-               default:
-                       usage(basename(argv[0]));
-                       ksft_exit_xfail();
-               }
-       }
-}
-
-static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
-                            struct ifobject *ifobj_rx)
-{
-       u32 i, j;
-
-       for (i = 0; i < MAX_INTERFACES; i++) {
-               struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
-
-               ifobj->xsk = &ifobj->xsk_arr[0];
-               ifobj->use_poll = false;
-               ifobj->use_fill_ring = true;
-               ifobj->release_rx = true;
-               ifobj->pkt_stream = test->pkt_stream_default;
-               ifobj->validation_func = NULL;
-
-               if (i == 0) {
-                       ifobj->rx_on = false;
-                       ifobj->tx_on = true;
-               } else {
-                       ifobj->rx_on = true;
-                       ifobj->tx_on = false;
-               }
-
-               memset(ifobj->umem, 0, sizeof(*ifobj->umem));
-               ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
-               ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
-
-               for (j = 0; j < MAX_SOCKETS; j++) {
-                       memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
-                       ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-               }
-       }
-
-       test->ifobj_tx = ifobj_tx;
-       test->ifobj_rx = ifobj_rx;
-       test->current_step = 0;
-       test->total_steps = 1;
-       test->nb_sockets = 1;
-       test->fail = false;
-}
-
-static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
-                          struct ifobject *ifobj_rx, enum test_mode mode)
-{
-       struct pkt_stream *pkt_stream;
-       u32 i;
-
-       pkt_stream = test->pkt_stream_default;
-       memset(test, 0, sizeof(*test));
-       test->pkt_stream_default = pkt_stream;
-
-       for (i = 0; i < MAX_INTERFACES; i++) {
-               struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
-
-               ifobj->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-               if (mode == TEST_MODE_SKB)
-                       ifobj->xdp_flags |= XDP_FLAGS_SKB_MODE;
-               else
-                       ifobj->xdp_flags |= XDP_FLAGS_DRV_MODE;
-
-               ifobj->bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
-       }
-
-       __test_spec_init(test, ifobj_tx, ifobj_rx);
-}
-
-static void test_spec_reset(struct test_spec *test)
-{
-       __test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
-}
-
-static void test_spec_set_name(struct test_spec *test, const char *name)
-{
-       strncpy(test->name, name, MAX_TEST_NAME_SIZE);
-}
-
-static void pkt_stream_reset(struct pkt_stream *pkt_stream)
-{
-       if (pkt_stream)
-               pkt_stream->rx_pkt_nb = 0;
-}
-
-static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
-{
-       if (pkt_nb >= pkt_stream->nb_pkts)
-               return NULL;
-
-       return &pkt_stream->pkts[pkt_nb];
-}
-
-static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
-{
-       while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) {
-               (*pkts_sent)++;
-               if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid)
-                       return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++];
-               pkt_stream->rx_pkt_nb++;
-       }
-       return NULL;
-}
-
-static void pkt_stream_delete(struct pkt_stream *pkt_stream)
-{
-       free(pkt_stream->pkts);
-       free(pkt_stream);
-}
-
-static void pkt_stream_restore_default(struct test_spec *test)
-{
-       struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream;
-
-       if (tx_pkt_stream != test->pkt_stream_default) {
-               pkt_stream_delete(test->ifobj_tx->pkt_stream);
-               test->ifobj_tx->pkt_stream = test->pkt_stream_default;
-       }
-
-       if (test->ifobj_rx->pkt_stream != test->pkt_stream_default &&
-           test->ifobj_rx->pkt_stream != tx_pkt_stream)
-               pkt_stream_delete(test->ifobj_rx->pkt_stream);
-       test->ifobj_rx->pkt_stream = test->pkt_stream_default;
-}
-
-static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
-{
-       struct pkt_stream *pkt_stream;
-
-       pkt_stream = calloc(1, sizeof(*pkt_stream));
-       if (!pkt_stream)
-               return NULL;
-
-       pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
-       if (!pkt_stream->pkts) {
-               free(pkt_stream);
-               return NULL;
-       }
-
-       pkt_stream->nb_pkts = nb_pkts;
-       return pkt_stream;
-}
-
-static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len)
-{
-       pkt->addr = addr;
-       pkt->len = len;
-       if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom)
-               pkt->valid = false;
-       else
-               pkt->valid = true;
-}
-
-static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
-{
-       struct pkt_stream *pkt_stream;
-       u32 i;
-
-       pkt_stream = __pkt_stream_alloc(nb_pkts);
-       if (!pkt_stream)
-               exit_with_error(ENOMEM);
-
-       pkt_stream->nb_pkts = nb_pkts;
-       for (i = 0; i < nb_pkts; i++) {
-               pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size,
-                       pkt_len);
-               pkt_stream->pkts[i].payload = i;
-       }
-
-       return pkt_stream;
-}
-
-static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem,
-                                          struct pkt_stream *pkt_stream)
-{
-       return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
-}
-
-static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
-{
-       struct pkt_stream *pkt_stream;
-
-       pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len);
-       test->ifobj_tx->pkt_stream = pkt_stream;
-       test->ifobj_rx->pkt_stream = pkt_stream;
-}
-
-static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
-{
-       struct xsk_umem_info *umem = test->ifobj_tx->umem;
-       struct pkt_stream *pkt_stream;
-       u32 i;
-
-       pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default);
-       for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2)
-               pkt_set(umem, &pkt_stream->pkts[i],
-                       (i % umem->num_frames) * umem->frame_size + offset, pkt_len);
-
-       test->ifobj_tx->pkt_stream = pkt_stream;
-       test->ifobj_rx->pkt_stream = pkt_stream;
-}
-
-static void pkt_stream_receive_half(struct test_spec *test)
-{
-       struct xsk_umem_info *umem = test->ifobj_rx->umem;
-       struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream;
-       u32 i;
-
-       test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts,
-                                                        pkt_stream->pkts[0].len);
-       pkt_stream = test->ifobj_rx->pkt_stream;
-       for (i = 1; i < pkt_stream->nb_pkts; i += 2)
-               pkt_stream->pkts[i].valid = false;
-}
-
-static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
-{
-       struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
-       struct udphdr *udp_hdr;
-       struct ethhdr *eth_hdr;
-       struct iphdr *ip_hdr;
-       void *data;
-
-       if (!pkt)
-               return NULL;
-       if (!pkt->valid || pkt->len < MIN_PKT_SIZE)
-               return pkt;
-
-       data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
-       udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr));
-       ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr));
-       eth_hdr = (struct ethhdr *)data;
-
-       gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr);
-       gen_ip_hdr(ifobject, ip_hdr);
-       gen_udp_csum(udp_hdr, ip_hdr);
-       gen_eth_hdr(ifobject, eth_hdr);
-
-       return pkt;
-}
-
-static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
-{
-       struct pkt_stream *pkt_stream;
-       u32 i;
-
-       pkt_stream = __pkt_stream_alloc(nb_pkts);
-       if (!pkt_stream)
-               exit_with_error(ENOMEM);
-
-       test->ifobj_tx->pkt_stream = pkt_stream;
-       test->ifobj_rx->pkt_stream = pkt_stream;
-
-       for (i = 0; i < nb_pkts; i++) {
-               pkt_stream->pkts[i].addr = pkts[i].addr;
-               pkt_stream->pkts[i].len = pkts[i].len;
-               pkt_stream->pkts[i].payload = i;
-               pkt_stream->pkts[i].valid = pkts[i].valid;
-       }
-}
-
-static void pkt_dump(void *pkt, u32 len)
-{
-       char s[INET_ADDRSTRLEN];
-       struct ethhdr *ethhdr;
-       struct udphdr *udphdr;
-       struct iphdr *iphdr;
-       int payload, i;
-
-       ethhdr = pkt;
-       iphdr = pkt + sizeof(*ethhdr);
-       udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr);
-
-       /*extract L2 frame */
-       fprintf(stdout, "DEBUG>> L2: dst mac: ");
-       for (i = 0; i < ETH_ALEN; i++)
-               fprintf(stdout, "%02X", ethhdr->h_dest[i]);
-
-       fprintf(stdout, "\nDEBUG>> L2: src mac: ");
-       for (i = 0; i < ETH_ALEN; i++)
-               fprintf(stdout, "%02X", ethhdr->h_source[i]);
-
-       /*extract L3 frame */
-       fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
-       fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
-               inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
-       fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
-               inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
-       /*extract L4 frame */
-       fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
-       fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
-       /*extract L5 frame */
-       payload = *((uint32_t *)(pkt + PKT_HDR_SIZE));
-
-       fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
-       fprintf(stdout, "---------------------------------------\n");
-}
-
-static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr,
-                             u64 pkt_stream_addr)
-{
-       u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
-       u32 offset = addr % umem->frame_size, expected_offset = 0;
-
-       if (!pkt_stream->use_addr_for_fill)
-               pkt_stream_addr = 0;
-
-       expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
-
-       if (offset == expected_offset)
-               return true;
-
-       ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
-       return false;
-}
-
-static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
-{
-       void *data = xsk_umem__get_data(buffer, addr);
-       struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
-
-       if (!pkt) {
-               ksft_print_msg("[%s] too many packets received\n", __func__);
-               return false;
-       }
-
-       if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) {
-               /* Do not try to verify packets that are smaller than minimum size. */
-               return true;
-       }
-
-       if (pkt->len != len) {
-               ksft_print_msg("[%s] expected length [%d], got length [%d]\n",
-                              __func__, pkt->len, len);
-               return false;
-       }
-
-       if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
-               u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
-
-               if (opt_pkt_dump)
-                       pkt_dump(data, PKT_SIZE);
-
-               if (pkt->payload != seqnum) {
-                       ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n",
-                                      __func__, pkt->payload, seqnum);
-                       return false;
-               }
-       } else {
-               ksft_print_msg("Invalid frame received: ");
-               ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
-                              iphdr->tos);
-               return false;
-       }
-
-       return true;
-}
-
-static void kick_tx(struct xsk_socket_info *xsk)
-{
-       int ret;
-
-       ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
-       if (ret >= 0)
-               return;
-       if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
-               usleep(100);
-               return;
-       }
-       exit_with_error(errno);
-}
-
-static void kick_rx(struct xsk_socket_info *xsk)
-{
-       int ret;
-
-       ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
-       if (ret < 0)
-               exit_with_error(errno);
-}
-
-static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
-{
-       unsigned int rcvd;
-       u32 idx;
-
-       if (xsk_ring_prod__needs_wakeup(&xsk->tx))
-               kick_tx(xsk);
-
-       rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
-       if (rcvd) {
-               if (rcvd > xsk->outstanding_tx) {
-                       u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
-
-                       ksft_print_msg("[%s] Too many packets completed\n", __func__);
-                       ksft_print_msg("Last completion address: %llx\n", addr);
-                       return TEST_FAILURE;
-               }
-
-               xsk_ring_cons__release(&xsk->umem->cq, rcvd);
-               xsk->outstanding_tx -= rcvd;
-       }
-
-       return TEST_PASS;
-}
-
-static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
-{
-       struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0};
-       u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0;
-       struct pkt_stream *pkt_stream = ifobj->pkt_stream;
-       struct xsk_socket_info *xsk = ifobj->xsk;
-       struct xsk_umem_info *umem = xsk->umem;
-       struct pkt *pkt;
-       int ret;
-
-       ret = gettimeofday(&tv_now, NULL);
-       if (ret)
-               exit_with_error(errno);
-       timeradd(&tv_now, &tv_timeout, &tv_end);
-
-       pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
-       while (pkt) {
-               ret = gettimeofday(&tv_now, NULL);
-               if (ret)
-                       exit_with_error(errno);
-               if (timercmp(&tv_now, &tv_end, >)) {
-                       ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
-                       return TEST_FAILURE;
-               }
-
-               kick_rx(xsk);
-
-               rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
-               if (!rcvd) {
-                       if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
-                               ret = poll(fds, 1, POLL_TMOUT);
-                               if (ret < 0)
-                                       exit_with_error(-ret);
-                       }
-                       continue;
-               }
-
-               if (ifobj->use_fill_ring) {
-                       ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-                       while (ret != rcvd) {
-                               if (ret < 0)
-                                       exit_with_error(-ret);
-                               if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
-                                       ret = poll(fds, 1, POLL_TMOUT);
-                                       if (ret < 0)
-                                               exit_with_error(-ret);
-                               }
-                               ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-                       }
-               }
-
-               for (i = 0; i < rcvd; i++) {
-                       const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
-                       u64 addr = desc->addr, orig;
-
-                       orig = xsk_umem__extract_addr(addr);
-                       addr = xsk_umem__add_offset_to_addr(addr);
-
-                       if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) ||
-                           !is_offset_correct(umem, pkt_stream, addr, pkt->addr))
-                               return TEST_FAILURE;
-
-                       if (ifobj->use_fill_ring)
-                               *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
-                       pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
-               }
-
-               if (ifobj->use_fill_ring)
-                       xsk_ring_prod__submit(&umem->fq, rcvd);
-               if (ifobj->release_rx)
-                       xsk_ring_cons__release(&xsk->rx, rcvd);
-
-               pthread_mutex_lock(&pacing_mutex);
-               pkts_in_flight -= pkts_sent;
-               if (pkts_in_flight < umem->num_frames)
-                       pthread_cond_signal(&pacing_cond);
-               pthread_mutex_unlock(&pacing_mutex);
-               pkts_sent = 0;
-       }
-
-       return TEST_PASS;
-}
-
-static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb)
-{
-       struct xsk_socket_info *xsk = ifobject->xsk;
-       u32 i, idx, valid_pkts = 0;
-
-       while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE)
-               complete_pkts(xsk, BATCH_SIZE);
-
-       for (i = 0; i < BATCH_SIZE; i++) {
-               struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
-               struct pkt *pkt = pkt_generate(ifobject, *pkt_nb);
-
-               if (!pkt)
-                       break;
-
-               tx_desc->addr = pkt->addr;
-               tx_desc->len = pkt->len;
-               (*pkt_nb)++;
-               if (pkt->valid)
-                       valid_pkts++;
-       }
-
-       pthread_mutex_lock(&pacing_mutex);
-       pkts_in_flight += valid_pkts;
-       /* pkts_in_flight might be negative if many invalid packets are sent */
-       if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) {
-               kick_tx(xsk);
-               pthread_cond_wait(&pacing_cond, &pacing_mutex);
-       }
-       pthread_mutex_unlock(&pacing_mutex);
-
-       xsk_ring_prod__submit(&xsk->tx, i);
-       xsk->outstanding_tx += valid_pkts;
-       if (complete_pkts(xsk, i))
-               return TEST_FAILURE;
-
-       usleep(10);
-       return TEST_PASS;
-}
-
-static void wait_for_tx_completion(struct xsk_socket_info *xsk)
-{
-       while (xsk->outstanding_tx)
-               complete_pkts(xsk, BATCH_SIZE);
-}
-
-static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
-{
-       struct pollfd fds = { };
-       u32 pkt_cnt = 0;
-
-       fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
-       fds.events = POLLOUT;
-
-       while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
-               int err;
-
-               if (ifobject->use_poll) {
-                       int ret;
-
-                       ret = poll(&fds, 1, POLL_TMOUT);
-                       if (ret <= 0)
-                               continue;
-
-                       if (!(fds.revents & POLLOUT))
-                               continue;
-               }
-
-               err = __send_pkts(ifobject, &pkt_cnt);
-               if (err || test->fail)
-                       return TEST_FAILURE;
-       }
-
-       wait_for_tx_completion(ifobject->xsk);
-       return TEST_PASS;
-}
-
-static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
-{
-       int fd = xsk_socket__fd(xsk), err;
-       socklen_t optlen, expected_len;
-
-       optlen = sizeof(*stats);
-       err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
-       if (err) {
-               ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
-                              __func__, -err, strerror(-err));
-               return TEST_FAILURE;
-       }
-
-       expected_len = sizeof(struct xdp_statistics);
-       if (optlen != expected_len) {
-               ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
-                              __func__, expected_len, optlen);
-               return TEST_FAILURE;
-       }
-
-       return TEST_PASS;
-}
-
-static int validate_rx_dropped(struct ifobject *ifobject)
-{
-       struct xsk_socket *xsk = ifobject->xsk->xsk;
-       struct xdp_statistics stats;
-       int err;
-
-       kick_rx(ifobject->xsk);
-
-       err = get_xsk_stats(xsk, &stats);
-       if (err)
-               return TEST_FAILURE;
-
-       if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2)
-               return TEST_PASS;
-
-       return TEST_FAILURE;
-}
-
-static int validate_rx_full(struct ifobject *ifobject)
-{
-       struct xsk_socket *xsk = ifobject->xsk->xsk;
-       struct xdp_statistics stats;
-       int err;
-
-       usleep(1000);
-       kick_rx(ifobject->xsk);
-
-       err = get_xsk_stats(xsk, &stats);
-       if (err)
-               return TEST_FAILURE;
-
-       if (stats.rx_ring_full)
-               return TEST_PASS;
-
-       return TEST_FAILURE;
-}
-
-static int validate_fill_empty(struct ifobject *ifobject)
-{
-       struct xsk_socket *xsk = ifobject->xsk->xsk;
-       struct xdp_statistics stats;
-       int err;
-
-       usleep(1000);
-       kick_rx(ifobject->xsk);
-
-       err = get_xsk_stats(xsk, &stats);
-       if (err)
-               return TEST_FAILURE;
-
-       if (stats.rx_fill_ring_empty_descs)
-               return TEST_PASS;
-
-       return TEST_FAILURE;
-}
-
-static int validate_tx_invalid_descs(struct ifobject *ifobject)
-{
-       struct xsk_socket *xsk = ifobject->xsk->xsk;
-       int fd = xsk_socket__fd(xsk);
-       struct xdp_statistics stats;
-       socklen_t optlen;
-       int err;
-
-       optlen = sizeof(stats);
-       err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
-       if (err) {
-               ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
-                              __func__, -err, strerror(-err));
-               return TEST_FAILURE;
-       }
-
-       if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) {
-               ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
-                              __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
-               return TEST_FAILURE;
-       }
-
-       return TEST_PASS;
-}
-
-static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
-{
-       u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
-       int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
-       int ret, ifindex;
-       void *bufs;
-       u32 i;
-
-       ifobject->ns_fd = switch_namespace(ifobject->nsname);
-
-       if (ifobject->umem->unaligned_mode)
-               mmap_flags |= MAP_HUGETLB;
-
-       bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
-       if (bufs == MAP_FAILED)
-               exit_with_error(errno);
-
-       ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz);
-       if (ret)
-               exit_with_error(-ret);
-
-       for (i = 0; i < test->nb_sockets; i++) {
-               u32 ctr = 0;
-
-               while (ctr++ < SOCK_RECONF_CTR) {
-                       ret = xsk_configure_socket(&ifobject->xsk_arr[i], ifobject->umem,
-                                                  ifobject, !!i);
-                       if (!ret)
-                               break;
-
-                       /* Retry if it fails as xsk_socket__create() is asynchronous */
-                       if (ctr >= SOCK_RECONF_CTR)
-                               exit_with_error(-ret);
-                       usleep(USLEEP_MAX);
-               }
-
-               if (ifobject->busy_poll)
-                       enable_busy_poll(&ifobject->xsk_arr[i]);
-       }
-
-       ifobject->xsk = &ifobject->xsk_arr[0];
-
-       if (!ifobject->rx_on)
-               return;
-
-       ifindex = if_nametoindex(ifobject->ifname);
-       if (!ifindex)
-               exit_with_error(errno);
-
-       ret = xsk_setup_xdp_prog(ifindex, &ifobject->xsk_map_fd);
-       if (ret)
-               exit_with_error(-ret);
-
-       ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd);
-       if (ret)
-               exit_with_error(-ret);
-}
-
-static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
-{
-       print_verbose("Destroying socket\n");
-       xsk_socket__delete(ifobj->xsk->xsk);
-       munmap(ifobj->umem->buffer, ifobj->umem->num_frames * ifobj->umem->frame_size);
-       xsk_umem__delete(ifobj->umem->umem);
-}
-
-static void *worker_testapp_validate_tx(void *arg)
-{
-       struct test_spec *test = (struct test_spec *)arg;
-       struct ifobject *ifobject = test->ifobj_tx;
-       int err;
-
-       if (test->current_step == 1)
-               thread_common_ops(test, ifobject);
-
-       print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
-                     ifobject->ifname);
-       err = send_pkts(test, ifobject);
-
-       if (!err && ifobject->validation_func)
-               err = ifobject->validation_func(ifobject);
-       if (err)
-               report_failure(test);
-
-       if (test->total_steps == test->current_step || err)
-               testapp_cleanup_xsk_res(ifobject);
-       pthread_exit(NULL);
-}
-
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream)
-{
-       u32 idx = 0, i, buffers_to_fill;
-       int ret;
-
-       if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
-               buffers_to_fill = umem->num_frames;
-       else
-               buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-
-       ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
-       if (ret != buffers_to_fill)
-               exit_with_error(ENOSPC);
-       for (i = 0; i < buffers_to_fill; i++) {
-               u64 addr;
-
-               if (pkt_stream->use_addr_for_fill) {
-                       struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i);
-
-                       if (!pkt)
-                               break;
-                       addr = pkt->addr;
-               } else {
-                       addr = i * umem->frame_size;
-               }
-
-               *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
-       }
-       xsk_ring_prod__submit(&umem->fq, buffers_to_fill);
-}
-
-static void *worker_testapp_validate_rx(void *arg)
-{
-       struct test_spec *test = (struct test_spec *)arg;
-       struct ifobject *ifobject = test->ifobj_rx;
-       struct pollfd fds = { };
-       int err;
-
-       if (test->current_step == 1)
-               thread_common_ops(test, ifobject);
-
-       xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream);
-
-       fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
-       fds.events = POLLIN;
-
-       pthread_barrier_wait(&barr);
-
-       err = receive_pkts(ifobject, &fds);
-
-       if (!err && ifobject->validation_func)
-               err = ifobject->validation_func(ifobject);
-       if (err) {
-               report_failure(test);
-               pthread_mutex_lock(&pacing_mutex);
-               pthread_cond_signal(&pacing_cond);
-               pthread_mutex_unlock(&pacing_mutex);
-       }
-
-       if (test->total_steps == test->current_step || err)
-               testapp_cleanup_xsk_res(ifobject);
-       pthread_exit(NULL);
-}
-
-static int testapp_validate_traffic(struct test_spec *test)
-{
-       struct ifobject *ifobj_tx = test->ifobj_tx;
-       struct ifobject *ifobj_rx = test->ifobj_rx;
-       pthread_t t0, t1;
-
-       if (pthread_barrier_init(&barr, NULL, 2))
-               exit_with_error(errno);
-
-       test->current_step++;
-       pkt_stream_reset(ifobj_rx->pkt_stream);
-       pkts_in_flight = 0;
-
-       /*Spawn RX thread */
-       pthread_create(&t0, NULL, ifobj_rx->func_ptr, test);
-
-       pthread_barrier_wait(&barr);
-       if (pthread_barrier_destroy(&barr))
-               exit_with_error(errno);
-
-       /*Spawn TX thread */
-       pthread_create(&t1, NULL, ifobj_tx->func_ptr, test);
-
-       pthread_join(t1, NULL);
-       pthread_join(t0, NULL);
-
-       return !!test->fail;
-}
-
-static void testapp_teardown(struct test_spec *test)
-{
-       int i;
-
-       test_spec_set_name(test, "TEARDOWN");
-       for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
-               if (testapp_validate_traffic(test))
-                       return;
-               test_spec_reset(test);
-       }
-}
-
-static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
-{
-       thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
-       struct ifobject *tmp_ifobj = (*ifobj1);
-
-       (*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
-       (*ifobj2)->func_ptr = tmp_func_ptr;
-
-       *ifobj1 = *ifobj2;
-       *ifobj2 = tmp_ifobj;
-}
-
-static void testapp_bidi(struct test_spec *test)
-{
-       test_spec_set_name(test, "BIDIRECTIONAL");
-       test->ifobj_tx->rx_on = true;
-       test->ifobj_rx->tx_on = true;
-       test->total_steps = 2;
-       if (testapp_validate_traffic(test))
-               return;
-
-       print_verbose("Switching Tx/Rx vectors\n");
-       swap_directions(&test->ifobj_rx, &test->ifobj_tx);
-       testapp_validate_traffic(test);
-
-       swap_directions(&test->ifobj_rx, &test->ifobj_tx);
-}
-
-static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
-{
-       int ret;
-
-       xsk_socket__delete(ifobj_tx->xsk->xsk);
-       xsk_socket__delete(ifobj_rx->xsk->xsk);
-       ifobj_tx->xsk = &ifobj_tx->xsk_arr[1];
-       ifobj_rx->xsk = &ifobj_rx->xsk_arr[1];
-
-       ret = xsk_socket__update_xskmap(ifobj_rx->xsk->xsk, ifobj_rx->xsk_map_fd);
-       if (ret)
-               exit_with_error(-ret);
-}
-
-static void testapp_bpf_res(struct test_spec *test)
-{
-       test_spec_set_name(test, "BPF_RES");
-       test->total_steps = 2;
-       test->nb_sockets = 2;
-       if (testapp_validate_traffic(test))
-               return;
-
-       swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
-       testapp_validate_traffic(test);
-}
-
-static void testapp_headroom(struct test_spec *test)
-{
-       test_spec_set_name(test, "UMEM_HEADROOM");
-       test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
-       testapp_validate_traffic(test);
-}
-
-static void testapp_stats_rx_dropped(struct test_spec *test)
-{
-       test_spec_set_name(test, "STAT_RX_DROPPED");
-       test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
-               XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
-       pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
-       pkt_stream_receive_half(test);
-       test->ifobj_rx->validation_func = validate_rx_dropped;
-       testapp_validate_traffic(test);
-}
-
-static void testapp_stats_tx_invalid_descs(struct test_spec *test)
-{
-       test_spec_set_name(test, "STAT_TX_INVALID");
-       pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
-       test->ifobj_tx->validation_func = validate_tx_invalid_descs;
-       testapp_validate_traffic(test);
-
-       pkt_stream_restore_default(test);
-}
-
-static void testapp_stats_rx_full(struct test_spec *test)
-{
-       test_spec_set_name(test, "STAT_RX_FULL");
-       pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE);
-       test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
-                                                        DEFAULT_UMEM_BUFFERS, PKT_SIZE);
-       if (!test->ifobj_rx->pkt_stream)
-               exit_with_error(ENOMEM);
-
-       test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
-       test->ifobj_rx->release_rx = false;
-       test->ifobj_rx->validation_func = validate_rx_full;
-       testapp_validate_traffic(test);
-
-       pkt_stream_restore_default(test);
-}
-
-static void testapp_stats_fill_empty(struct test_spec *test)
-{
-       test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
-       pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE);
-       test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
-                                                        DEFAULT_UMEM_BUFFERS, PKT_SIZE);
-       if (!test->ifobj_rx->pkt_stream)
-               exit_with_error(ENOMEM);
-
-       test->ifobj_rx->use_fill_ring = false;
-       test->ifobj_rx->validation_func = validate_fill_empty;
-       testapp_validate_traffic(test);
-
-       pkt_stream_restore_default(test);
-}
-
-/* Simple test */
-static bool hugepages_present(struct ifobject *ifobject)
-{
-       const size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size;
-       void *bufs;
-
-       bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
-                   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
-       if (bufs == MAP_FAILED)
-               return false;
-
-       munmap(bufs, mmap_sz);
-       return true;
-}
-
-static bool testapp_unaligned(struct test_spec *test)
-{
-       if (!hugepages_present(test->ifobj_tx)) {
-               ksft_test_result_skip("No 2M huge pages present.\n");
-               return false;
-       }
-
-       test_spec_set_name(test, "UNALIGNED_MODE");
-       test->ifobj_tx->umem->unaligned_mode = true;
-       test->ifobj_rx->umem->unaligned_mode = true;
-       /* Let half of the packets straddle a buffer boundrary */
-       pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2);
-       test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
-       testapp_validate_traffic(test);
-
-       pkt_stream_restore_default(test);
-       return true;
-}
-
-static void testapp_single_pkt(struct test_spec *test)
-{
-       struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}};
-
-       pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
-       testapp_validate_traffic(test);
-       pkt_stream_restore_default(test);
-}
-
-static void testapp_invalid_desc(struct test_spec *test)
-{
-       struct pkt pkts[] = {
-               /* Zero packet address allowed */
-               {0, PKT_SIZE, 0, true},
-               /* Allowed packet */
-               {0x1000, PKT_SIZE, 0, true},
-               /* Straddling the start of umem */
-               {-2, PKT_SIZE, 0, false},
-               /* Packet too large */
-               {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
-               /* After umem ends */
-               {UMEM_SIZE, PKT_SIZE, 0, false},
-               /* Straddle the end of umem */
-               {UMEM_SIZE - PKT_SIZE / 2, PKT_SIZE, 0, false},
-               /* Straddle a page boundrary */
-               {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false},
-               /* Straddle a 2K boundrary */
-               {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true},
-               /* Valid packet for synch so that something is received */
-               {0x4000, PKT_SIZE, 0, true}};
-
-       if (test->ifobj_tx->umem->unaligned_mode) {
-               /* Crossing a page boundrary allowed */
-               pkts[6].valid = true;
-       }
-       if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
-               /* Crossing a 2K frame size boundrary not allowed */
-               pkts[7].valid = false;
-       }
-
-       pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
-       testapp_validate_traffic(test);
-       pkt_stream_restore_default(test);
-}
-
-static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
-                      const char *dst_ip, const char *src_ip, const u16 dst_port,
-                      const u16 src_port, thread_func_t func_ptr)
-{
-       struct in_addr ip;
-
-       memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
-       memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
-
-       inet_aton(dst_ip, &ip);
-       ifobj->dst_ip = ip.s_addr;
-
-       inet_aton(src_ip, &ip);
-       ifobj->src_ip = ip.s_addr;
-
-       ifobj->dst_port = dst_port;
-       ifobj->src_port = src_port;
-
-       ifobj->func_ptr = func_ptr;
-}
-
-static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
-{
-       switch (type) {
-       case TEST_TYPE_STATS_RX_DROPPED:
-               testapp_stats_rx_dropped(test);
-               break;
-       case TEST_TYPE_STATS_TX_INVALID_DESCS:
-               testapp_stats_tx_invalid_descs(test);
-               break;
-       case TEST_TYPE_STATS_RX_FULL:
-               testapp_stats_rx_full(test);
-               break;
-       case TEST_TYPE_STATS_FILL_EMPTY:
-               testapp_stats_fill_empty(test);
-               break;
-       case TEST_TYPE_TEARDOWN:
-               testapp_teardown(test);
-               break;
-       case TEST_TYPE_BIDI:
-               testapp_bidi(test);
-               break;
-       case TEST_TYPE_BPF_RES:
-               testapp_bpf_res(test);
-               break;
-       case TEST_TYPE_RUN_TO_COMPLETION:
-               test_spec_set_name(test, "RUN_TO_COMPLETION");
-               testapp_validate_traffic(test);
-               break;
-       case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT:
-               test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT");
-               testapp_single_pkt(test);
-               break;
-       case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME:
-               test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE");
-               test->ifobj_tx->umem->frame_size = 2048;
-               test->ifobj_rx->umem->frame_size = 2048;
-               pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE);
-               testapp_validate_traffic(test);
-
-               pkt_stream_restore_default(test);
-               break;
-       case TEST_TYPE_POLL:
-               test->ifobj_tx->use_poll = true;
-               test->ifobj_rx->use_poll = true;
-               test_spec_set_name(test, "POLL");
-               testapp_validate_traffic(test);
-               break;
-       case TEST_TYPE_ALIGNED_INV_DESC:
-               test_spec_set_name(test, "ALIGNED_INV_DESC");
-               testapp_invalid_desc(test);
-               break;
-       case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME:
-               test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE");
-               test->ifobj_tx->umem->frame_size = 2048;
-               test->ifobj_rx->umem->frame_size = 2048;
-               testapp_invalid_desc(test);
-               break;
-       case TEST_TYPE_UNALIGNED_INV_DESC:
-               if (!hugepages_present(test->ifobj_tx)) {
-                       ksft_test_result_skip("No 2M huge pages present.\n");
-                       return;
-               }
-               test_spec_set_name(test, "UNALIGNED_INV_DESC");
-               test->ifobj_tx->umem->unaligned_mode = true;
-               test->ifobj_rx->umem->unaligned_mode = true;
-               testapp_invalid_desc(test);
-               break;
-       case TEST_TYPE_UNALIGNED:
-               if (!testapp_unaligned(test))
-                       return;
-               break;
-       case TEST_TYPE_HEADROOM:
-               testapp_headroom(test);
-               break;
-       default:
-               break;
-       }
-
-       if (!test->fail)
-               ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
-                                     test->name);
-}
-
-static struct ifobject *ifobject_create(void)
-{
-       struct ifobject *ifobj;
-
-       ifobj = calloc(1, sizeof(struct ifobject));
-       if (!ifobj)
-               return NULL;
-
-       ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
-       if (!ifobj->xsk_arr)
-               goto out_xsk_arr;
-
-       ifobj->umem = calloc(1, sizeof(*ifobj->umem));
-       if (!ifobj->umem)
-               goto out_umem;
-
-       return ifobj;
-
-out_umem:
-       free(ifobj->xsk_arr);
-out_xsk_arr:
-       free(ifobj);
-       return NULL;
-}
-
-static void ifobject_delete(struct ifobject *ifobj)
-{
-       free(ifobj->umem);
-       free(ifobj->xsk_arr);
-       free(ifobj);
-}
-
-int main(int argc, char **argv)
-{
-       struct pkt_stream *pkt_stream_default;
-       struct ifobject *ifobj_tx, *ifobj_rx;
-       u32 i, j, failed_tests = 0;
-       struct test_spec test;
-
-       /* Use libbpf 1.0 API mode */
-       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
-       ifobj_tx = ifobject_create();
-       if (!ifobj_tx)
-               exit_with_error(ENOMEM);
-       ifobj_rx = ifobject_create();
-       if (!ifobj_rx)
-               exit_with_error(ENOMEM);
-
-       setlocale(LC_ALL, "");
-
-       parse_command_line(ifobj_tx, ifobj_rx, argc, argv);
-
-       if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) {
-               usage(basename(argv[0]));
-               ksft_exit_xfail();
-       }
-
-       init_iface(ifobj_tx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2,
-                  worker_testapp_validate_tx);
-       init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1,
-                  worker_testapp_validate_rx);
-
-       test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
-       pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
-       if (!pkt_stream_default)
-               exit_with_error(ENOMEM);
-       test.pkt_stream_default = pkt_stream_default;
-
-       ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
-
-       for (i = 0; i < TEST_MODE_MAX; i++)
-               for (j = 0; j < TEST_TYPE_MAX; j++) {
-                       test_spec_init(&test, ifobj_tx, ifobj_rx, i);
-                       run_pkt_test(&test, i, j);
-                       usleep(USLEEP_MAX);
-
-                       if (test.fail)
-                               failed_tests++;
-               }
-
-       pkt_stream_delete(pkt_stream_default);
-       ifobject_delete(ifobj_tx);
-       ifobject_delete(ifobj_rx);
-
-       if (failed_tests)
-               ksft_exit_fail();
-       else
-               ksft_exit_pass();
-}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
deleted file mode 100644 (file)
index 8f672b0..0000000
+++ /dev/null
@@ -1,172 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright(c) 2020 Intel Corporation.
- */
-
-#ifndef XDPXCEIVER_H_
-#define XDPXCEIVER_H_
-
-#ifndef SOL_XDP
-#define SOL_XDP 283
-#endif
-
-#ifndef AF_XDP
-#define AF_XDP 44
-#endif
-
-#ifndef PF_XDP
-#define PF_XDP AF_XDP
-#endif
-
-#ifndef SO_BUSY_POLL_BUDGET
-#define SO_BUSY_POLL_BUDGET 70
-#endif
-
-#ifndef SO_PREFER_BUSY_POLL
-#define SO_PREFER_BUSY_POLL 69
-#endif
-
-#define TEST_PASS 0
-#define TEST_FAILURE -1
-#define MAX_INTERFACES 2
-#define MAX_INTERFACE_NAME_CHARS 7
-#define MAX_INTERFACES_NAMESPACE_CHARS 10
-#define MAX_SOCKETS 2
-#define MAX_TEST_NAME_SIZE 32
-#define MAX_TEARDOWN_ITER 10
-#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
-                       sizeof(struct udphdr))
-#define MIN_ETH_PKT_SIZE 64
-#define ETH_FCS_SIZE 4
-#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE)
-#define PKT_SIZE (MIN_PKT_SIZE)
-#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
-#define IP_PKT_VER 0x4
-#define IP_PKT_TOS 0x9
-#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
-#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
-#define USLEEP_MAX 10000
-#define SOCK_RECONF_CTR 10
-#define BATCH_SIZE 64
-#define POLL_TMOUT 1000
-#define RECV_TMOUT 3
-#define DEFAULT_PKT_CNT (4 * 1024)
-#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
-#define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE)
-#define RX_FULL_RXQSIZE 32
-#define UMEM_HEADROOM_TEST_SIZE 128
-#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
-
-#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
-
-enum test_mode {
-       TEST_MODE_SKB,
-       TEST_MODE_DRV,
-       TEST_MODE_MAX
-};
-
-enum test_type {
-       TEST_TYPE_RUN_TO_COMPLETION,
-       TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME,
-       TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT,
-       TEST_TYPE_POLL,
-       TEST_TYPE_UNALIGNED,
-       TEST_TYPE_ALIGNED_INV_DESC,
-       TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME,
-       TEST_TYPE_UNALIGNED_INV_DESC,
-       TEST_TYPE_HEADROOM,
-       TEST_TYPE_TEARDOWN,
-       TEST_TYPE_BIDI,
-       TEST_TYPE_STATS_RX_DROPPED,
-       TEST_TYPE_STATS_TX_INVALID_DESCS,
-       TEST_TYPE_STATS_RX_FULL,
-       TEST_TYPE_STATS_FILL_EMPTY,
-       TEST_TYPE_BPF_RES,
-       TEST_TYPE_MAX
-};
-
-static bool opt_pkt_dump;
-static bool opt_verbose;
-
-struct xsk_umem_info {
-       struct xsk_ring_prod fq;
-       struct xsk_ring_cons cq;
-       struct xsk_umem *umem;
-       u32 num_frames;
-       u32 frame_headroom;
-       void *buffer;
-       u32 frame_size;
-       bool unaligned_mode;
-};
-
-struct xsk_socket_info {
-       struct xsk_ring_cons rx;
-       struct xsk_ring_prod tx;
-       struct xsk_umem_info *umem;
-       struct xsk_socket *xsk;
-       u32 outstanding_tx;
-       u32 rxqsize;
-};
-
-struct pkt {
-       u64 addr;
-       u32 len;
-       u32 payload;
-       bool valid;
-};
-
-struct pkt_stream {
-       u32 nb_pkts;
-       u32 rx_pkt_nb;
-       struct pkt *pkts;
-       bool use_addr_for_fill;
-};
-
-struct ifobject;
-typedef int (*validation_func_t)(struct ifobject *ifobj);
-typedef void *(*thread_func_t)(void *arg);
-
-struct ifobject {
-       char ifname[MAX_INTERFACE_NAME_CHARS];
-       char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
-       struct xsk_socket_info *xsk;
-       struct xsk_socket_info *xsk_arr;
-       struct xsk_umem_info *umem;
-       thread_func_t func_ptr;
-       validation_func_t validation_func;
-       struct pkt_stream *pkt_stream;
-       int ns_fd;
-       int xsk_map_fd;
-       u32 dst_ip;
-       u32 src_ip;
-       u32 xdp_flags;
-       u32 bind_flags;
-       u16 src_port;
-       u16 dst_port;
-       bool tx_on;
-       bool rx_on;
-       bool use_poll;
-       bool busy_poll;
-       bool use_fill_ring;
-       bool release_rx;
-       u8 dst_mac[ETH_ALEN];
-       u8 src_mac[ETH_ALEN];
-};
-
-struct test_spec {
-       struct ifobject *ifobj_tx;
-       struct ifobject *ifobj_rx;
-       struct pkt_stream *pkt_stream_default;
-       u16 total_steps;
-       u16 current_step;
-       u16 nb_sockets;
-       bool fail;
-       char name[MAX_TEST_NAME_SIZE];
-};
-
-pthread_barrier_t barr;
-pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER;
-
-int pkts_in_flight;
-
-#endif                         /* XDPXCEIVER_H */
diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
new file mode 100644 (file)
index 0000000..f2721a4
--- /dev/null
@@ -0,0 +1,1268 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2019 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <asm/barrier.h>
+#include <linux/compiler.h>
+#include <linux/ethtool.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_xdp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <linux/if_link.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "xsk.h"
+
+#ifndef SOL_XDP
+ #define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+ #define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+ #define PF_XDP AF_XDP
+#endif
+
+#define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
+
+enum xsk_prog {
+       XSK_PROG_FALLBACK,
+       XSK_PROG_REDIRECT_FLAGS,
+};
+
+struct xsk_umem {
+       struct xsk_ring_prod *fill_save;
+       struct xsk_ring_cons *comp_save;
+       char *umem_area;
+       struct xsk_umem_config config;
+       int fd;
+       int refcount;
+       struct list_head ctx_list;
+       bool rx_ring_setup_done;
+       bool tx_ring_setup_done;
+};
+
+struct xsk_ctx {
+       struct xsk_ring_prod *fill;
+       struct xsk_ring_cons *comp;
+       __u32 queue_id;
+       struct xsk_umem *umem;
+       int refcount;
+       int ifindex;
+       struct list_head list;
+       int prog_fd;
+       int link_fd;
+       int xsks_map_fd;
+       char ifname[IFNAMSIZ];
+       bool has_bpf_link;
+};
+
+struct xsk_socket {
+       struct xsk_ring_cons *rx;
+       struct xsk_ring_prod *tx;
+       __u64 outstanding_tx;
+       struct xsk_ctx *ctx;
+       struct xsk_socket_config config;
+       int fd;
+};
+
+struct xsk_nl_info {
+       bool xdp_prog_attached;
+       int ifindex;
+       int fd;
+};
+
+/* Up until and including Linux 5.3 */
+struct xdp_ring_offset_v1 {
+       __u64 producer;
+       __u64 consumer;
+       __u64 desc;
+};
+
+/* Up until and including Linux 5.3 */
+struct xdp_mmap_offsets_v1 {
+       struct xdp_ring_offset_v1 rx;
+       struct xdp_ring_offset_v1 tx;
+       struct xdp_ring_offset_v1 fr;
+       struct xdp_ring_offset_v1 cr;
+};
+
+int xsk_umem__fd(const struct xsk_umem *umem)
+{
+       return umem ? umem->fd : -EINVAL;
+}
+
+int xsk_socket__fd(const struct xsk_socket *xsk)
+{
+       return xsk ? xsk->fd : -EINVAL;
+}
+
+static bool xsk_page_aligned(void *buffer)
+{
+       unsigned long addr = (unsigned long)buffer;
+
+       return !(addr & (getpagesize() - 1));
+}
+
+static void xsk_set_umem_config(struct xsk_umem_config *cfg,
+                               const struct xsk_umem_config *usr_cfg)
+{
+       if (!usr_cfg) {
+               cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+               cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+               cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+               cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+               cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
+               return;
+       }
+
+       cfg->fill_size = usr_cfg->fill_size;
+       cfg->comp_size = usr_cfg->comp_size;
+       cfg->frame_size = usr_cfg->frame_size;
+       cfg->frame_headroom = usr_cfg->frame_headroom;
+       cfg->flags = usr_cfg->flags;
+}
+
+static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
+                                    const struct xsk_socket_config *usr_cfg)
+{
+       if (!usr_cfg) {
+               cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+               cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+               cfg->libbpf_flags = 0;
+               cfg->xdp_flags = 0;
+               cfg->bind_flags = 0;
+               return 0;
+       }
+
+       if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
+               return -EINVAL;
+
+       cfg->rx_size = usr_cfg->rx_size;
+       cfg->tx_size = usr_cfg->tx_size;
+       cfg->libbpf_flags = usr_cfg->libbpf_flags;
+       cfg->xdp_flags = usr_cfg->xdp_flags;
+       cfg->bind_flags = usr_cfg->bind_flags;
+
+       return 0;
+}
+
+static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
+{
+       struct xdp_mmap_offsets_v1 off_v1;
+
+       /* getsockopt on a kernel <= 5.3 has no flags fields.
+        * Copy over the offsets to the correct places in the >=5.4 format
+        * and put the flags where they would have been on that kernel.
+        */
+       memcpy(&off_v1, off, sizeof(off_v1));
+
+       off->rx.producer = off_v1.rx.producer;
+       off->rx.consumer = off_v1.rx.consumer;
+       off->rx.desc = off_v1.rx.desc;
+       off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
+
+       off->tx.producer = off_v1.tx.producer;
+       off->tx.consumer = off_v1.tx.consumer;
+       off->tx.desc = off_v1.tx.desc;
+       off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
+
+       off->fr.producer = off_v1.fr.producer;
+       off->fr.consumer = off_v1.fr.consumer;
+       off->fr.desc = off_v1.fr.desc;
+       off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
+
+       off->cr.producer = off_v1.cr.producer;
+       off->cr.consumer = off_v1.cr.consumer;
+       off->cr.desc = off_v1.cr.desc;
+       off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
+}
+
+static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
+{
+       socklen_t optlen;
+       int err;
+
+       optlen = sizeof(*off);
+       err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
+       if (err)
+               return err;
+
+       if (optlen == sizeof(*off))
+               return 0;
+
+       if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
+               xsk_mmap_offsets_v1(off);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
+                                struct xsk_ring_prod *fill,
+                                struct xsk_ring_cons *comp)
+{
+       struct xdp_mmap_offsets off;
+       void *map;
+       int err;
+
+       err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
+                        &umem->config.fill_size,
+                        sizeof(umem->config.fill_size));
+       if (err)
+               return -errno;
+
+       err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+                        &umem->config.comp_size,
+                        sizeof(umem->config.comp_size));
+       if (err)
+               return -errno;
+
+       err = xsk_get_mmap_offsets(fd, &off);
+       if (err)
+               return -errno;
+
+       map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+                  PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+                  XDP_UMEM_PGOFF_FILL_RING);
+       if (map == MAP_FAILED)
+               return -errno;
+
+       fill->mask = umem->config.fill_size - 1;
+       fill->size = umem->config.fill_size;
+       fill->producer = map + off.fr.producer;
+       fill->consumer = map + off.fr.consumer;
+       fill->flags = map + off.fr.flags;
+       fill->ring = map + off.fr.desc;
+       fill->cached_cons = umem->config.fill_size;
+
+       map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+                  PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+                  XDP_UMEM_PGOFF_COMPLETION_RING);
+       if (map == MAP_FAILED) {
+               err = -errno;
+               goto out_mmap;
+       }
+
+       comp->mask = umem->config.comp_size - 1;
+       comp->size = umem->config.comp_size;
+       comp->producer = map + off.cr.producer;
+       comp->consumer = map + off.cr.consumer;
+       comp->flags = map + off.cr.flags;
+       comp->ring = map + off.cr.desc;
+
+       return 0;
+
+out_mmap:
+       munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
+       return err;
+}
+
+int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
+                    __u64 size, struct xsk_ring_prod *fill,
+                    struct xsk_ring_cons *comp,
+                    const struct xsk_umem_config *usr_config)
+{
+       struct xdp_umem_reg mr;
+       struct xsk_umem *umem;
+       int err;
+
+       if (!umem_area || !umem_ptr || !fill || !comp)
+               return -EFAULT;
+       if (!size && !xsk_page_aligned(umem_area))
+               return -EINVAL;
+
+       umem = calloc(1, sizeof(*umem));
+       if (!umem)
+               return -ENOMEM;
+
+       umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
+       if (umem->fd < 0) {
+               err = -errno;
+               goto out_umem_alloc;
+       }
+
+       umem->umem_area = umem_area;
+       INIT_LIST_HEAD(&umem->ctx_list);
+       xsk_set_umem_config(&umem->config, usr_config);
+
+       memset(&mr, 0, sizeof(mr));
+       mr.addr = (uintptr_t)umem_area;
+       mr.len = size;
+       mr.chunk_size = umem->config.frame_size;
+       mr.headroom = umem->config.frame_headroom;
+       mr.flags = umem->config.flags;
+
+       err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
+       if (err) {
+               err = -errno;
+               goto out_socket;
+       }
+
+       err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
+       if (err)
+               goto out_socket;
+
+       umem->fill_save = fill;
+       umem->comp_save = comp;
+       *umem_ptr = umem;
+       return 0;
+
+out_socket:
+       close(umem->fd);
+out_umem_alloc:
+       free(umem);
+       return err;
+}
+
+struct xsk_umem_config_v1 {
+       __u32 fill_size;
+       __u32 comp_size;
+       __u32 frame_size;
+       __u32 frame_headroom;
+};
+
+static enum xsk_prog get_xsk_prog(void)
+{
+       enum xsk_prog detected = XSK_PROG_FALLBACK;
+       char data_in = 0, data_out;
+       struct bpf_insn insns[] = {
+               BPF_LD_MAP_FD(BPF_REG_1, 0),
+               BPF_MOV64_IMM(BPF_REG_2, 0),
+               BPF_MOV64_IMM(BPF_REG_3, XDP_PASS),
+               BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+               BPF_EXIT_INSN(),
+       };
+       LIBBPF_OPTS(bpf_test_run_opts, opts,
+               .data_in = &data_in,
+               .data_size_in = 1,
+               .data_out = &data_out,
+       );
+
+       int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns);
+
+       map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL);
+       if (map_fd < 0)
+               return detected;
+
+       insns[0].imm = map_fd;
+
+       prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
+       if (prog_fd < 0) {
+               close(map_fd);
+               return detected;
+       }
+
+       ret = bpf_prog_test_run_opts(prog_fd, &opts);
+       if (!ret && opts.retval == XDP_PASS)
+               detected = XSK_PROG_REDIRECT_FLAGS;
+       close(prog_fd);
+       close(map_fd);
+       return detected;
+}
+
+static int xsk_load_xdp_prog(struct xsk_socket *xsk)
+{
+       static const int log_buf_size = 16 * 1024;
+       struct xsk_ctx *ctx = xsk->ctx;
+       char log_buf[log_buf_size];
+       int prog_fd;
+
+       /* This is the fallback C-program:
+        * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
+        * {
+        *     int ret, index = ctx->rx_queue_index;
+        *
+        *     // A set entry here means that the correspnding queue_id
+        *     // has an active AF_XDP socket bound to it.
+        *     ret = bpf_redirect_map(&xsks_map, index, XDP_PASS);
+        *     if (ret > 0)
+        *         return ret;
+        *
+        *     // Fallback for pre-5.3 kernels, not supporting default
+        *     // action in the flags parameter.
+        *     if (bpf_map_lookup_elem(&xsks_map, &index))
+        *         return bpf_redirect_map(&xsks_map, index, 0);
+        *     return XDP_PASS;
+        * }
+        */
+       struct bpf_insn prog[] = {
+               /* r2 = *(u32 *)(r1 + 16) */
+               BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
+               /* *(u32 *)(r10 - 4) = r2 */
+               BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
+               /* r1 = xskmap[] */
+               BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+               /* r3 = XDP_PASS */
+               BPF_MOV64_IMM(BPF_REG_3, 2),
+               /* call bpf_redirect_map */
+               BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+               /* if w0 != 0 goto pc+13 */
+               BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13),
+               /* r2 = r10 */
+               BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+               /* r2 += -4 */
+               BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+               /* r1 = xskmap[] */
+               BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+               /* call bpf_map_lookup_elem */
+               BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+               /* r1 = r0 */
+               BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+               /* r0 = XDP_PASS */
+               BPF_MOV64_IMM(BPF_REG_0, 2),
+               /* if r1 == 0 goto pc+5 */
+               BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
+               /* r2 = *(u32 *)(r10 - 4) */
+               BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
+               /* r1 = xskmap[] */
+               BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+               /* r3 = 0 */
+               BPF_MOV64_IMM(BPF_REG_3, 0),
+               /* call bpf_redirect_map */
+               BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+               /* The jumps are to this instruction */
+               BPF_EXIT_INSN(),
+       };
+
+       /* This is the post-5.3 kernel C-program:
+        * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
+        * {
+        *     return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS);
+        * }
+        */
+       struct bpf_insn prog_redirect_flags[] = {
+               /* r2 = *(u32 *)(r1 + 16) */
+               BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
+               /* r1 = xskmap[] */
+               BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+               /* r3 = XDP_PASS */
+               BPF_MOV64_IMM(BPF_REG_3, 2),
+               /* call bpf_redirect_map */
+               BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+               BPF_EXIT_INSN(),
+       };
+       size_t insns_cnt[] = {ARRAY_SIZE(prog),
+                             ARRAY_SIZE(prog_redirect_flags),
+       };
+       struct bpf_insn *progs[] = {prog, prog_redirect_flags};
+       enum xsk_prog option = get_xsk_prog();
+       LIBBPF_OPTS(bpf_prog_load_opts, opts,
+               .log_buf = log_buf,
+               .log_size = log_buf_size,
+       );
+
+       prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause",
+                               progs[option], insns_cnt[option], &opts);
+       if (prog_fd < 0) {
+               pr_warn("BPF log buffer:\n%s", log_buf);
+               return prog_fd;
+       }
+
+       ctx->prog_fd = prog_fd;
+       return 0;
+}
+
+static int xsk_create_bpf_link(struct xsk_socket *xsk)
+{
+       DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+       struct xsk_ctx *ctx = xsk->ctx;
+       __u32 prog_id = 0;
+       int link_fd;
+       int err;
+
+       err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
+       if (err) {
+               pr_warn("getting XDP prog id failed\n");
+               return err;
+       }
+
+       /* if there's a netlink-based XDP prog loaded on interface, bail out
+        * and ask user to do the removal by himself
+        */
+       if (prog_id) {
+               pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
+               return -EINVAL;
+       }
+
+       opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
+
+       link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts);
+       if (link_fd < 0) {
+               pr_warn("bpf_link_create failed: %s\n", strerror(errno));
+               return link_fd;
+       }
+
+       ctx->link_fd = link_fd;
+       return 0;
+}
+
+/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
+ * is zero-terminated string no matter what (unless sz == 0, in which case
+ * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
+ * in what is returned. Given this is internal helper, it's trivial to extend
+ * this, when necessary. Use this instead of strncpy inside libbpf source code.
+ */
+static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz)
+{
+        size_t i;
+
+        if (sz == 0)
+                return;
+
+        sz--;
+        for (i = 0; i < sz && src[i]; i++)
+                dst[i] = src[i];
+        dst[i] = '\0';
+}
+
+static int xsk_get_max_queues(struct xsk_socket *xsk)
+{
+       struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+       struct xsk_ctx *ctx = xsk->ctx;
+       struct ifreq ifr = {};
+       int fd, err, ret;
+
+       fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+       if (fd < 0)
+               return -errno;
+
+       ifr.ifr_data = (void *)&channels;
+       libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);
+       err = ioctl(fd, SIOCETHTOOL, &ifr);
+       if (err && errno != EOPNOTSUPP) {
+               ret = -errno;
+               goto out;
+       }
+
+       if (err) {
+               /* If the device says it has no channels, then all traffic
+                * is sent to a single stream, so max queues = 1.
+                */
+               ret = 1;
+       } else {
+               /* Take the max of rx, tx, combined. Drivers return
+                * the number of channels in different ways.
+                */
+               ret = max(channels.max_rx, channels.max_tx);
+               ret = max(ret, (int)channels.max_combined);
+       }
+
+out:
+       close(fd);
+       return ret;
+}
+
+static int xsk_create_bpf_maps(struct xsk_socket *xsk)
+{
+       struct xsk_ctx *ctx = xsk->ctx;
+       int max_queues;
+       int fd;
+
+       max_queues = xsk_get_max_queues(xsk);
+       if (max_queues < 0)
+               return max_queues;
+
+       fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map",
+                           sizeof(int), sizeof(int), max_queues, NULL);
+       if (fd < 0)
+               return fd;
+
+       ctx->xsks_map_fd = fd;
+
+       return 0;
+}
+
+static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
+{
+       struct xsk_ctx *ctx = xsk->ctx;
+
+       bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
+       close(ctx->xsks_map_fd);
+}
+
+static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
+{
+       __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
+       __u32 map_len = sizeof(struct bpf_map_info);
+       struct bpf_prog_info prog_info = {};
+       struct xsk_ctx *ctx = xsk->ctx;
+       struct bpf_map_info map_info;
+       int fd, err;
+
+       err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
+       if (err)
+               return err;
+
+       num_maps = prog_info.nr_map_ids;
+
+       map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
+       if (!map_ids)
+               return -ENOMEM;
+
+       memset(&prog_info, 0, prog_len);
+       prog_info.nr_map_ids = num_maps;
+       prog_info.map_ids = (__u64)(unsigned long)map_ids;
+
+       err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
+       if (err)
+               goto out_map_ids;
+
+       ctx->xsks_map_fd = -1;
+
+       for (i = 0; i < prog_info.nr_map_ids; i++) {
+               fd = bpf_map_get_fd_by_id(map_ids[i]);
+               if (fd < 0)
+                       continue;
+
+               memset(&map_info, 0, map_len);
+               err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
+               if (err) {
+                       close(fd);
+                       continue;
+               }
+
+               if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) {
+                       ctx->xsks_map_fd = fd;
+                       break;
+               }
+
+               close(fd);
+       }
+
+       if (ctx->xsks_map_fd == -1)
+               err = -ENOENT;
+
+out_map_ids:
+       free(map_ids);
+       return err;
+}
+
+static int xsk_set_bpf_maps(struct xsk_socket *xsk)
+{
+       struct xsk_ctx *ctx = xsk->ctx;
+
+       return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
+                                  &xsk->fd, 0);
+}
+
+static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
+{
+       struct bpf_link_info link_info;
+       __u32 link_len;
+       __u32 id = 0;
+       int err;
+       int fd;
+
+       while (true) {
+               err = bpf_link_get_next_id(id, &id);
+               if (err) {
+                       if (errno == ENOENT) {
+                               err = 0;
+                               break;
+                       }
+                       pr_warn("can't get next link: %s\n", strerror(errno));
+                       break;
+               }
+
+               fd = bpf_link_get_fd_by_id(id);
+               if (fd < 0) {
+                       if (errno == ENOENT)
+                               continue;
+                       pr_warn("can't get link by id (%u): %s\n", id, strerror(errno));
+                       err = -errno;
+                       break;
+               }
+
+               link_len = sizeof(struct bpf_link_info);
+               memset(&link_info, 0, link_len);
+               err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len);
+               if (err) {
+                       pr_warn("can't get link info: %s\n", strerror(errno));
+                       close(fd);
+                       break;
+               }
+               if (link_info.type == BPF_LINK_TYPE_XDP) {
+                       if (link_info.xdp.ifindex == ifindex) {
+                               *link_fd = fd;
+                               if (prog_id)
+                                       *prog_id = link_info.prog_id;
+                               break;
+                       }
+               }
+               close(fd);
+       }
+
+       return err;
+}
+
+static bool xsk_probe_bpf_link(void)
+{
+       LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE);
+       struct bpf_insn insns[2] = {
+               BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
+               BPF_EXIT_INSN()
+       };
+       int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns);
+       int ifindex_lo = 1;
+       bool ret = false;
+       int err;
+
+       err = xsk_link_lookup(ifindex_lo, NULL, &link_fd);
+       if (err)
+               return ret;
+
+       if (link_fd >= 0)
+               return true;
+
+       prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
+       if (prog_fd < 0)
+               return ret;
+
+       link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts);
+       close(prog_fd);
+
+       if (link_fd >= 0) {
+               ret = true;
+               close(link_fd);
+       }
+
+       return ret;
+}
+
+static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
+{
+       char ifname[IFNAMSIZ];
+       struct xsk_ctx *ctx;
+       char *interface;
+
+       ctx = calloc(1, sizeof(*ctx));
+       if (!ctx)
+               return -ENOMEM;
+
+       interface = if_indextoname(ifindex, &ifname[0]);
+       if (!interface) {
+               free(ctx);
+               return -errno;
+       }
+
+       ctx->ifindex = ifindex;
+       libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
+
+       xsk->ctx = ctx;
+       xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
+
+       return 0;
+}
+
+static int xsk_init_xdp_res(struct xsk_socket *xsk,
+                           int *xsks_map_fd)
+{
+       struct xsk_ctx *ctx = xsk->ctx;
+       int err;
+
+       err = xsk_create_bpf_maps(xsk);
+       if (err)
+               return err;
+
+       err = xsk_load_xdp_prog(xsk);
+       if (err)
+               goto err_load_xdp_prog;
+
+       if (ctx->has_bpf_link)
+               err = xsk_create_bpf_link(xsk);
+       else
+               err = bpf_xdp_attach(xsk->ctx->ifindex, ctx->prog_fd,
+                                    xsk->config.xdp_flags, NULL);
+
+       if (err)
+               goto err_attach_xdp_prog;
+
+       if (!xsk->rx)
+               return err;
+
+       err = xsk_set_bpf_maps(xsk);
+       if (err)
+               goto err_set_bpf_maps;
+
+       return err;
+
+err_set_bpf_maps:
+       if (ctx->has_bpf_link)
+               close(ctx->link_fd);
+       else
+               bpf_xdp_detach(ctx->ifindex, 0, NULL);
+err_attach_xdp_prog:
+       close(ctx->prog_fd);
+err_load_xdp_prog:
+       xsk_delete_bpf_maps(xsk);
+       return err;
+}
+
+static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id)
+{
+       struct xsk_ctx *ctx = xsk->ctx;
+       int err;
+
+       ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+       if (ctx->prog_fd < 0) {
+               err = -errno;
+               goto err_prog_fd;
+       }
+       err = xsk_lookup_bpf_maps(xsk);
+       if (err)
+               goto err_lookup_maps;
+
+       if (!xsk->rx)
+               return err;
+
+       err = xsk_set_bpf_maps(xsk);
+       if (err)
+               goto err_set_maps;
+
+       return err;
+
+err_set_maps:
+       close(ctx->xsks_map_fd);
+err_lookup_maps:
+       close(ctx->prog_fd);
+err_prog_fd:
+       if (ctx->has_bpf_link)
+               close(ctx->link_fd);
+       return err;
+}
+
+static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
+{
+       struct xsk_socket *xsk = _xdp;
+       struct xsk_ctx *ctx = xsk->ctx;
+       __u32 prog_id = 0;
+       int err;
+
+       if (ctx->has_bpf_link)
+               err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
+       else
+               err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
+
+       if (err)
+               return err;
+
+       err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) :
+                        xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id);
+
+       if (!err && xsks_map_fd)
+               *xsks_map_fd = ctx->xsks_map_fd;
+
+       return err;
+}
+
+int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd)
+{
+       return __xsk_setup_xdp_prog(xsk, xsks_map_fd);
+}
+
+static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
+                                  __u32 queue_id)
+{
+       struct xsk_ctx *ctx;
+
+       if (list_empty(&umem->ctx_list))
+               return NULL;
+
+       list_for_each_entry(ctx, &umem->ctx_list, list) {
+               if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
+                       ctx->refcount++;
+                       return ctx;
+               }
+       }
+
+       return NULL;
+}
+
+static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
+{
+       struct xsk_umem *umem = ctx->umem;
+       struct xdp_mmap_offsets off;
+       int err;
+
+       if (--ctx->refcount)
+               return;
+
+       if (!unmap)
+               goto out_free;
+
+       err = xsk_get_mmap_offsets(umem->fd, &off);
+       if (err)
+               goto out_free;
+
+       munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
+              sizeof(__u64));
+       munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
+              sizeof(__u64));
+
+out_free:
+       list_del(&ctx->list);
+       free(ctx);
+}
+
+static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
+                                     struct xsk_umem *umem, int ifindex,
+                                     const char *ifname, __u32 queue_id,
+                                     struct xsk_ring_prod *fill,
+                                     struct xsk_ring_cons *comp)
+{
+       struct xsk_ctx *ctx;
+       int err;
+
+       ctx = calloc(1, sizeof(*ctx));
+       if (!ctx)
+               return NULL;
+
+       if (!umem->fill_save) {
+               err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
+               if (err) {
+                       free(ctx);
+                       return NULL;
+               }
+       } else if (umem->fill_save != fill || umem->comp_save != comp) {
+               /* Copy over rings to new structs. */
+               memcpy(fill, umem->fill_save, sizeof(*fill));
+               memcpy(comp, umem->comp_save, sizeof(*comp));
+       }
+
+       ctx->ifindex = ifindex;
+       ctx->refcount = 1;
+       ctx->umem = umem;
+       ctx->queue_id = queue_id;
+       libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
+
+       ctx->fill = fill;
+       ctx->comp = comp;
+       list_add(&ctx->list, &umem->ctx_list);
+       ctx->has_bpf_link = xsk_probe_bpf_link();
+       return ctx;
+}
+
+static void xsk_destroy_xsk_struct(struct xsk_socket *xsk)
+{
+       free(xsk->ctx);
+       free(xsk);
+}
+
+int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd)
+{
+       xsk->ctx->xsks_map_fd = fd;
+       return xsk_set_bpf_maps(xsk);
+}
+
+int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd)
+{
+       struct xsk_socket *xsk;
+       int res;
+
+       xsk = calloc(1, sizeof(*xsk));
+       if (!xsk)
+               return -ENOMEM;
+
+       res = xsk_create_xsk_struct(ifindex, xsk);
+       if (res) {
+               free(xsk);
+               return -EINVAL;
+       }
+
+       res = __xsk_setup_xdp_prog(xsk, xsks_map_fd);
+
+       xsk_destroy_xsk_struct(xsk);
+
+       return res;
+}
+
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+                             const char *ifname,
+                             __u32 queue_id, struct xsk_umem *umem,
+                             struct xsk_ring_cons *rx,
+                             struct xsk_ring_prod *tx,
+                             struct xsk_ring_prod *fill,
+                             struct xsk_ring_cons *comp,
+                             const struct xsk_socket_config *usr_config)
+{
+       bool unmap, rx_setup_done = false, tx_setup_done = false;
+       void *rx_map = NULL, *tx_map = NULL;
+       struct sockaddr_xdp sxdp = {};
+       struct xdp_mmap_offsets off;
+       struct xsk_socket *xsk;
+       struct xsk_ctx *ctx;
+       int err, ifindex;
+
+       if (!umem || !xsk_ptr || !(rx || tx))
+               return -EFAULT;
+
+       unmap = umem->fill_save != fill;
+
+       xsk = calloc(1, sizeof(*xsk));
+       if (!xsk)
+               return -ENOMEM;
+
+       err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
+       if (err)
+               goto out_xsk_alloc;
+
+       xsk->outstanding_tx = 0;
+       ifindex = if_nametoindex(ifname);
+       if (!ifindex) {
+               err = -errno;
+               goto out_xsk_alloc;
+       }
+
+       if (umem->refcount++ > 0) {
+               xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
+               if (xsk->fd < 0) {
+                       err = -errno;
+                       goto out_xsk_alloc;
+               }
+       } else {
+               xsk->fd = umem->fd;
+               rx_setup_done = umem->rx_ring_setup_done;
+               tx_setup_done = umem->tx_ring_setup_done;
+       }
+
+       ctx = xsk_get_ctx(umem, ifindex, queue_id);
+       if (!ctx) {
+               if (!fill || !comp) {
+                       err = -EFAULT;
+                       goto out_socket;
+               }
+
+               ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
+                                    fill, comp);
+               if (!ctx) {
+                       err = -ENOMEM;
+                       goto out_socket;
+               }
+       }
+       xsk->ctx = ctx;
+
+       if (rx && !rx_setup_done) {
+               err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
+                                &xsk->config.rx_size,
+                                sizeof(xsk->config.rx_size));
+               if (err) {
+                       err = -errno;
+                       goto out_put_ctx;
+               }
+               if (xsk->fd == umem->fd)
+                       umem->rx_ring_setup_done = true;
+       }
+       if (tx && !tx_setup_done) {
+               err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
+                                &xsk->config.tx_size,
+                                sizeof(xsk->config.tx_size));
+               if (err) {
+                       err = -errno;
+                       goto out_put_ctx;
+               }
+               if (xsk->fd == umem->fd)
+                       umem->tx_ring_setup_done = true;
+       }
+
+       err = xsk_get_mmap_offsets(xsk->fd, &off);
+       if (err) {
+               err = -errno;
+               goto out_put_ctx;
+       }
+
+       if (rx) {
+               rx_map = mmap(NULL, off.rx.desc +
+                             xsk->config.rx_size * sizeof(struct xdp_desc),
+                             PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+                             xsk->fd, XDP_PGOFF_RX_RING);
+               if (rx_map == MAP_FAILED) {
+                       err = -errno;
+                       goto out_put_ctx;
+               }
+
+               rx->mask = xsk->config.rx_size - 1;
+               rx->size = xsk->config.rx_size;
+               rx->producer = rx_map + off.rx.producer;
+               rx->consumer = rx_map + off.rx.consumer;
+               rx->flags = rx_map + off.rx.flags;
+               rx->ring = rx_map + off.rx.desc;
+               rx->cached_prod = *rx->producer;
+               rx->cached_cons = *rx->consumer;
+       }
+       xsk->rx = rx;
+
+       if (tx) {
+               tx_map = mmap(NULL, off.tx.desc +
+                             xsk->config.tx_size * sizeof(struct xdp_desc),
+                             PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+                             xsk->fd, XDP_PGOFF_TX_RING);
+               if (tx_map == MAP_FAILED) {
+                       err = -errno;
+                       goto out_mmap_rx;
+               }
+
+               tx->mask = xsk->config.tx_size - 1;
+               tx->size = xsk->config.tx_size;
+               tx->producer = tx_map + off.tx.producer;
+               tx->consumer = tx_map + off.tx.consumer;
+               tx->flags = tx_map + off.tx.flags;
+               tx->ring = tx_map + off.tx.desc;
+               tx->cached_prod = *tx->producer;
+               /* cached_cons is r->size bigger than the real consumer pointer
+                * See xsk_prod_nb_free
+                */
+               tx->cached_cons = *tx->consumer + xsk->config.tx_size;
+       }
+       xsk->tx = tx;
+
+       sxdp.sxdp_family = PF_XDP;
+       sxdp.sxdp_ifindex = ctx->ifindex;
+       sxdp.sxdp_queue_id = ctx->queue_id;
+       if (umem->refcount > 1) {
+               sxdp.sxdp_flags |= XDP_SHARED_UMEM;
+               sxdp.sxdp_shared_umem_fd = umem->fd;
+       } else {
+               sxdp.sxdp_flags = xsk->config.bind_flags;
+       }
+
+       err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
+       if (err) {
+               err = -errno;
+               goto out_mmap_tx;
+       }
+
+       if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
+               err = __xsk_setup_xdp_prog(xsk, NULL);
+               if (err)
+                       goto out_mmap_tx;
+       }
+
+       *xsk_ptr = xsk;
+       umem->fill_save = NULL;
+       umem->comp_save = NULL;
+       return 0;
+
+out_mmap_tx:
+       if (tx)
+               munmap(tx_map, off.tx.desc +
+                      xsk->config.tx_size * sizeof(struct xdp_desc));
+out_mmap_rx:
+       if (rx)
+               munmap(rx_map, off.rx.desc +
+                      xsk->config.rx_size * sizeof(struct xdp_desc));
+out_put_ctx:
+       xsk_put_ctx(ctx, unmap);
+out_socket:
+       if (--umem->refcount)
+               close(xsk->fd);
+out_xsk_alloc:
+       free(xsk);
+       return err;
+}
+
+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+                      __u32 queue_id, struct xsk_umem *umem,
+                      struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+                      const struct xsk_socket_config *usr_config)
+{
+       if (!umem)
+               return -EFAULT;
+
+       return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
+                                        rx, tx, umem->fill_save,
+                                        umem->comp_save, usr_config);
+}
+
+int xsk_umem__delete(struct xsk_umem *umem)
+{
+       struct xdp_mmap_offsets off;
+       int err;
+
+       if (!umem)
+               return 0;
+
+       if (umem->refcount)
+               return -EBUSY;
+
+       err = xsk_get_mmap_offsets(umem->fd, &off);
+       if (!err && umem->fill_save && umem->comp_save) {
+               munmap(umem->fill_save->ring - off.fr.desc,
+                      off.fr.desc + umem->config.fill_size * sizeof(__u64));
+               munmap(umem->comp_save->ring - off.cr.desc,
+                      off.cr.desc + umem->config.comp_size * sizeof(__u64));
+       }
+
+       close(umem->fd);
+       free(umem);
+
+       return 0;
+}
+
+void xsk_socket__delete(struct xsk_socket *xsk)
+{
+       size_t desc_sz = sizeof(struct xdp_desc);
+       struct xdp_mmap_offsets off;
+       struct xsk_umem *umem;
+       struct xsk_ctx *ctx;
+       int err;
+
+       if (!xsk)
+               return;
+
+       ctx = xsk->ctx;
+       umem = ctx->umem;
+
+       xsk_put_ctx(ctx, true);
+
+       if (!ctx->refcount) {
+               xsk_delete_bpf_maps(xsk);
+               close(ctx->prog_fd);
+               if (ctx->has_bpf_link)
+                       close(ctx->link_fd);
+       }
+
+       err = xsk_get_mmap_offsets(xsk->fd, &off);
+       if (!err) {
+               if (xsk->rx) {
+                       munmap(xsk->rx->ring - off.rx.desc,
+                              off.rx.desc + xsk->config.rx_size * desc_sz);
+               }
+               if (xsk->tx) {
+                       munmap(xsk->tx->ring - off.tx.desc,
+                              off.tx.desc + xsk->config.tx_size * desc_sz);
+               }
+       }
+
+       umem->refcount--;
+       /* Do not close an fd that also has an associated umem connected
+        * to it.
+        */
+       if (xsk->fd != umem->fd)
+               close(xsk->fd);
+       free(xsk);
+}
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
new file mode 100644 (file)
index 0000000..997723b
--- /dev/null
@@ -0,0 +1,316 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright (c) 2018 - 2019 Intel Corporation.
+ * Copyright (c) 2019 Facebook
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef __XSK_H
+#define __XSK_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/if_xdp.h>
+
+#include <bpf/libbpf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* This whole API has been deprecated and moved to libxdp that can be found at
+ * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so
+ * it should just be linking with libxdp instead of libbpf for this set of
+ * functionality. If not, please submit a bug report on the aforementioned page.
+ */
+
+/* Load-Acquire Store-Release barriers used by the XDP socket
+ * library. The following macros should *NOT* be considered part of
+ * the xsk.h API, and is subject to change anytime.
+ *
+ * LIBRARY INTERNAL
+ */
+
+#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x)
+#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
+
+#if defined(__i386__) || defined(__x86_64__)
+# define libbpf_smp_store_release(p, v)                                        \
+       do {                                                            \
+               asm volatile("" : : : "memory");                        \
+               __XSK_WRITE_ONCE(*p, v);                                \
+       } while (0)
+# define libbpf_smp_load_acquire(p)                                    \
+       ({                                                              \
+               typeof(*p) ___p1 = __XSK_READ_ONCE(*p);                 \
+               asm volatile("" : : : "memory");                        \
+               ___p1;                                                  \
+       })
+#elif defined(__aarch64__)
+# define libbpf_smp_store_release(p, v)                                        \
+               asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
+# define libbpf_smp_load_acquire(p)                                    \
+       ({                                                              \
+               typeof(*p) ___p1;                                       \
+               asm volatile ("ldar %w0, %1"                            \
+                             : "=r" (___p1) : "Q" (*p) : "memory");    \
+               ___p1;                                                  \
+       })
+#elif defined(__riscv)
+# define libbpf_smp_store_release(p, v)                                        \
+       do {                                                            \
+               asm volatile ("fence rw,w" : : : "memory");             \
+               __XSK_WRITE_ONCE(*p, v);                                \
+       } while (0)
+# define libbpf_smp_load_acquire(p)                                    \
+       ({                                                              \
+               typeof(*p) ___p1 = __XSK_READ_ONCE(*p);                 \
+               asm volatile ("fence r,rw" : : : "memory");             \
+               ___p1;                                                  \
+       })
+#endif
+
+#ifndef libbpf_smp_store_release
+#define libbpf_smp_store_release(p, v)                                 \
+       do {                                                            \
+               __sync_synchronize();                                   \
+               __XSK_WRITE_ONCE(*p, v);                                \
+       } while (0)
+#endif
+
+#ifndef libbpf_smp_load_acquire
+#define libbpf_smp_load_acquire(p)                                     \
+       ({                                                              \
+               typeof(*p) ___p1 = __XSK_READ_ONCE(*p);                 \
+               __sync_synchronize();                                   \
+               ___p1;                                                  \
+       })
+#endif
+
+/* LIBRARY INTERNAL -- END */
+
+/* Do not access these members directly. Use the functions below. */
+#define DEFINE_XSK_RING(name) \
+struct name { \
+       __u32 cached_prod; \
+       __u32 cached_cons; \
+       __u32 mask; \
+       __u32 size; \
+       __u32 *producer; \
+       __u32 *consumer; \
+       void *ring; \
+       __u32 *flags; \
+}
+
+DEFINE_XSK_RING(xsk_ring_prod);
+DEFINE_XSK_RING(xsk_ring_cons);
+
+/* For a detailed explanation on the memory barriers associated with the
+ * ring, please take a look at net/xdp/xsk_queue.h.
+ */
+
+struct xsk_umem;
+struct xsk_socket;
+
+static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
+                                             __u32 idx)
+{
+       __u64 *addrs = (__u64 *)fill->ring;
+
+       return &addrs[idx & fill->mask];
+}
+
+static inline const __u64 *
+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
+{
+       const __u64 *addrs = (const __u64 *)comp->ring;
+
+       return &addrs[idx & comp->mask];
+}
+
+static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
+                                                     __u32 idx)
+{
+       struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
+
+       return &descs[idx & tx->mask];
+}
+
+static inline const struct xdp_desc *
+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
+{
+       const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
+
+       return &descs[idx & rx->mask];
+}
+
+static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r)
+{
+       return *r->flags & XDP_RING_NEED_WAKEUP;
+}
+
+static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
+{
+       __u32 free_entries = r->cached_cons - r->cached_prod;
+
+       if (free_entries >= nb)
+               return free_entries;
+
+       /* Refresh the local tail pointer.
+        * cached_cons is r->size bigger than the real consumer pointer so
+        * that this addition can be avoided in the more frequently
+        * executed code that computs free_entries in the beginning of
+        * this function. Without this optimization it whould have been
+        * free_entries = r->cached_prod - r->cached_cons + r->size.
+        */
+       r->cached_cons = libbpf_smp_load_acquire(r->consumer);
+       r->cached_cons += r->size;
+
+       return r->cached_cons - r->cached_prod;
+}
+
+static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
+{
+       __u32 entries = r->cached_prod - r->cached_cons;
+
+       if (entries == 0) {
+               r->cached_prod = libbpf_smp_load_acquire(r->producer);
+               entries = r->cached_prod - r->cached_cons;
+       }
+
+       return (entries > nb) ? nb : entries;
+}
+
+static inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx)
+{
+       if (xsk_prod_nb_free(prod, nb) < nb)
+               return 0;
+
+       *idx = prod->cached_prod;
+       prod->cached_prod += nb;
+
+       return nb;
+}
+
+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
+{
+       /* Make sure everything has been written to the ring before indicating
+        * this to the kernel by writing the producer pointer.
+        */
+       libbpf_smp_store_release(prod->producer, *prod->producer + nb);
+}
+
+static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
+{
+       __u32 entries = xsk_cons_nb_avail(cons, nb);
+
+       if (entries > 0) {
+               *idx = cons->cached_cons;
+               cons->cached_cons += entries;
+       }
+
+       return entries;
+}
+
+static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb)
+{
+       cons->cached_cons -= nb;
+}
+
+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
+{
+       /* Make sure data has been read before indicating we are done
+        * with the entries by updating the consumer pointer.
+        */
+       libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
+
+}
+
+static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
+{
+       return &((char *)umem_area)[addr];
+}
+
+static inline __u64 xsk_umem__extract_addr(__u64 addr)
+{
+       return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline __u64 xsk_umem__extract_offset(__u64 addr)
+{
+       return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
+{
+       return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
+}
+
+int xsk_umem__fd(const struct xsk_umem *umem);
+int xsk_socket__fd(const struct xsk_socket *xsk);
+
+#define XSK_RING_CONS__DEFAULT_NUM_DESCS      2048
+#define XSK_RING_PROD__DEFAULT_NUM_DESCS      2048
+#define XSK_UMEM__DEFAULT_FRAME_SHIFT    12 /* 4096 bytes */
+#define XSK_UMEM__DEFAULT_FRAME_SIZE     (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
+#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
+#define XSK_UMEM__DEFAULT_FLAGS 0
+
+struct xsk_umem_config {
+       __u32 fill_size;
+       __u32 comp_size;
+       __u32 frame_size;
+       __u32 frame_headroom;
+       __u32 flags;
+};
+
+int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd);
+int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd);
+int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd);
+
+/* Flags for the libbpf_flags field. */
+#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
+
+struct xsk_socket_config {
+       __u32 rx_size;
+       __u32 tx_size;
+       __u32 libbpf_flags;
+       __u32 xdp_flags;
+       __u16 bind_flags;
+};
+
+/* Set config to NULL to get the default configuration. */
+int xsk_umem__create(struct xsk_umem **umem,
+                    void *umem_area, __u64 size,
+                    struct xsk_ring_prod *fill,
+                    struct xsk_ring_cons *comp,
+                    const struct xsk_umem_config *config);
+int xsk_socket__create(struct xsk_socket **xsk,
+                      const char *ifname, __u32 queue_id,
+                      struct xsk_umem *umem,
+                      struct xsk_ring_cons *rx,
+                      struct xsk_ring_prod *tx,
+                      const struct xsk_socket_config *config);
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+                             const char *ifname,
+                             __u32 queue_id, struct xsk_umem *umem,
+                             struct xsk_ring_cons *rx,
+                             struct xsk_ring_prod *tx,
+                             struct xsk_ring_prod *fill,
+                             struct xsk_ring_cons *comp,
+                             const struct xsk_socket_config *config);
+
+/* Returns 0 for success and -EBUSY if the umem is still in use. */
+int xsk_umem__delete(struct xsk_umem *umem);
+void xsk_socket__delete(struct xsk_socket *xsk);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __XSK_H */
index 684e813..a0b7172 100755 (executable)
@@ -8,7 +8,7 @@ ksft_xfail=2
 ksft_xpass=3
 ksft_skip=4
 
-XSKOBJ=xdpxceiver
+XSKOBJ=xskxceiver
 
 validate_root_exec()
 {
@@ -77,7 +77,7 @@ validate_ip_utility()
        [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip; }
 }
 
-execxdpxceiver()
+exec_xskxceiver()
 {
         if [[ $busy_poll -eq 1 ]]; then
                ARGS+="-b "
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
new file mode 100644 (file)
index 0000000..74d56d9
--- /dev/null
@@ -0,0 +1,1682 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. */
+
+/*
+ * Some functions in this program are taken from
+ * Linux kernel samples/bpf/xdpsock* and modified
+ * for use.
+ *
+ * See test_xsk.sh for detailed information on test topology
+ * and prerequisite network setup.
+ *
+ * This test program contains two threads, each thread is single socket with
+ * a unique UMEM. It validates in-order packet delivery and packet content
+ * by sending packets to each other.
+ *
+ * Tests Information:
+ * ------------------
+ * These selftests test AF_XDP SKB and Native/DRV modes using veth
+ * Virtual Ethernet interfaces.
+ *
+ * For each mode, the following tests are run:
+ *    a. nopoll - soft-irq processing in run-to-completion mode
+ *    b. poll - using poll() syscall
+ *    c. Socket Teardown
+ *       Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
+ *       both sockets, then repeat multiple times. Only nopoll mode is used
+ *    d. Bi-directional sockets
+ *       Configure sockets as bi-directional tx/rx sockets, sets up fill and
+ *       completion rings on each socket, tx/rx in both directions. Only nopoll
+ *       mode is used
+ *    e. Statistics
+ *       Trigger some error conditions and ensure that the appropriate statistics
+ *       are incremented. Within this test, the following statistics are tested:
+ *       i.   rx dropped
+ *            Increase the UMEM frame headroom to a value which results in
+ *            insufficient space in the rx buffer for both the packet and the headroom.
+ *       ii.  tx invalid
+ *            Set the 'len' field of tx descriptors to an invalid value (umem frame
+ *            size + 1).
+ *       iii. rx ring full
+ *            Reduce the size of the RX ring to a fraction of the fill ring size.
+ *       iv.  fill queue empty
+ *            Do not populate the fill queue and then try to receive pkts.
+ *    f. bpf_link resource persistence
+ *       Configure sockets at indexes 0 and 1, run a traffic on queue ids 0,
+ *       then remove xsk sockets from queue 0 on both veth interfaces and
+ *       finally run a traffic on queues ids 1
+ *    g. unaligned mode
+ *    h. tests for invalid and corner case Tx descriptors so that the correct ones
+ *       are discarded and let through, respectively.
+ *    i. 2K frame size tests
+ *
+ * Total tests: 12
+ *
+ * Flow:
+ * -----
+ * - Single process spawns two threads: Tx and Rx
+ * - Each of these two threads attach to a veth interface within their assigned
+ *   namespaces
+ * - Each thread Creates one AF_XDP socket connected to a unique umem for each
+ *   veth interface
+ * - Tx thread Transmits 10k packets from veth<xxxx> to veth<yyyy>
+ * - Rx thread verifies if all 10k packets were received and delivered in-order,
+ *   and have the right content
+ *
+ * Enable/disable packet dump mode:
+ * --------------------------
+ * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
+ * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <errno.h>
+#include <getopt.h>
+#include <asm/barrier.h>
+#include <linux/if_link.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <locale.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdatomic.h>
+#include "xsk.h"
+#include "xskxceiver.h"
+#include "../kselftest.h"
+
+/* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf.
+ * Until xskxceiver is either moved or re-writed into libxdp, suppress
+ * deprecation warnings in this file
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
+static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
+static const char *IP1 = "192.168.100.162";
+static const char *IP2 = "192.168.100.161";
+static const u16 UDP_PORT1 = 2020;
+static const u16 UDP_PORT2 = 2121;
+
+static void __exit_with_error(int error, const char *file, const char *func, int line)
+{
+       ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
+                             strerror(error));
+       ksft_exit_xfail();
+}
+
+#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
+
+#define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV"
+#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
+
+static void report_failure(struct test_spec *test)
+{
+       if (test->fail)
+               return;
+
+       ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
+                             test->name);
+       test->fail = true;
+}
+
+static void memset32_htonl(void *dest, u32 val, u32 size)
+{
+       u32 *ptr = (u32 *)dest;
+       int i;
+
+       val = htonl(val);
+
+       for (i = 0; i < (size & (~0x3)); i += 4)
+               ptr[i >> 2] = val;
+}
+
+/*
+ * Fold a partial checksum
+ * This function code has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static __u16 csum_fold(__u32 csum)
+{
+       u32 sum = (__force u32)csum;
+
+       sum = (sum & 0xffff) + (sum >> 16);
+       sum = (sum & 0xffff) + (sum >> 16);
+       return (__force __u16)~sum;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static u32 from64to32(u64 x)
+{
+       /* add up 32-bit and 32-bit for 32+c bit */
+       x = (x & 0xffffffff) + (x >> 32);
+       /* add up carry.. */
+       x = (x & 0xffffffff) + (x >> 32);
+       return (u32)x;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+{
+       unsigned long long s = (__force u32)sum;
+
+       s += (__force u32)saddr;
+       s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN__
+       s += proto + len;
+#else
+       s += (proto + len) << 8;
+#endif
+       return (__force __u32)from64to32(s);
+}
+
+/*
+ * This function has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+{
+       return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
+{
+       u32 csum = 0;
+       u32 cnt = 0;
+
+       /* udp hdr and data */
+       for (; cnt < len; cnt += 2)
+               csum += udp_pkt[cnt >> 1];
+
+       return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
+}
+
+static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr)
+{
+       memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN);
+       memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN);
+       eth_hdr->h_proto = htons(ETH_P_IP);
+}
+
+static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr)
+{
+       ip_hdr->version = IP_PKT_VER;
+       ip_hdr->ihl = 0x5;
+       ip_hdr->tos = IP_PKT_TOS;
+       ip_hdr->tot_len = htons(IP_PKT_SIZE);
+       ip_hdr->id = 0;
+       ip_hdr->frag_off = 0;
+       ip_hdr->ttl = IPDEFTTL;
+       ip_hdr->protocol = IPPROTO_UDP;
+       ip_hdr->saddr = ifobject->src_ip;
+       ip_hdr->daddr = ifobject->dst_ip;
+       ip_hdr->check = 0;
+}
+
+static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject,
+                       struct udphdr *udp_hdr)
+{
+       udp_hdr->source = htons(ifobject->src_port);
+       udp_hdr->dest = htons(ifobject->dst_port);
+       udp_hdr->len = htons(UDP_PKT_SIZE);
+       memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE);
+}
+
+static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
+{
+       udp_hdr->check = 0;
+       udp_hdr->check =
+           udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
+}
+
+static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size)
+{
+       struct xsk_umem_config cfg = {
+               .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+               .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+               .frame_size = umem->frame_size,
+               .frame_headroom = umem->frame_headroom,
+               .flags = XSK_UMEM__DEFAULT_FLAGS
+       };
+       int ret;
+
+       if (umem->unaligned_mode)
+               cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+
+       ret = xsk_umem__create(&umem->umem, buffer, size,
+                              &umem->fq, &umem->cq, &cfg);
+       if (ret)
+               return ret;
+
+       umem->buffer = buffer;
+       return 0;
+}
+
+static void enable_busy_poll(struct xsk_socket_info *xsk)
+{
+       int sock_opt;
+
+       sock_opt = 1;
+       if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
+                      (void *)&sock_opt, sizeof(sock_opt)) < 0)
+               exit_with_error(errno);
+
+       sock_opt = 20;
+       if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
+                      (void *)&sock_opt, sizeof(sock_opt)) < 0)
+               exit_with_error(errno);
+
+       sock_opt = BATCH_SIZE;
+       if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
+                      (void *)&sock_opt, sizeof(sock_opt)) < 0)
+               exit_with_error(errno);
+}
+
+static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+                               struct ifobject *ifobject, bool shared)
+{
+       struct xsk_socket_config cfg = {};
+       struct xsk_ring_cons *rxr;
+       struct xsk_ring_prod *txr;
+
+       xsk->umem = umem;
+       cfg.rx_size = xsk->rxqsize;
+       cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+       cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
+       cfg.xdp_flags = ifobject->xdp_flags;
+       cfg.bind_flags = ifobject->bind_flags;
+       if (shared)
+               cfg.bind_flags |= XDP_SHARED_UMEM;
+
+       txr = ifobject->tx_on ? &xsk->tx : NULL;
+       rxr = ifobject->rx_on ? &xsk->rx : NULL;
+       return xsk_socket__create(&xsk->xsk, ifobject->ifname, 0, umem->umem, rxr, txr, &cfg);
+}
+
+static struct option long_options[] = {
+       {"interface", required_argument, 0, 'i'},
+       {"busy-poll", no_argument, 0, 'b'},
+       {"dump-pkts", no_argument, 0, 'D'},
+       {"verbose", no_argument, 0, 'v'},
+       {0, 0, 0, 0}
+};
+
+static void usage(const char *prog)
+{
+       const char *str =
+               "  Usage: %s [OPTIONS]\n"
+               "  Options:\n"
+               "  -i, --interface      Use interface\n"
+               "  -D, --dump-pkts      Dump packets L2 - L5\n"
+               "  -v, --verbose        Verbose output\n"
+               "  -b, --busy-poll      Enable busy poll\n";
+
+       ksft_print_msg(str, prog);
+}
+
+static int switch_namespace(const char *nsname)
+{
+       char fqns[26] = "/var/run/netns/";
+       int nsfd;
+
+       if (!nsname || strlen(nsname) == 0)
+               return -1;
+
+       strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1);
+       nsfd = open(fqns, O_RDONLY);
+
+       if (nsfd == -1)
+               exit_with_error(errno);
+
+       if (setns(nsfd, 0) == -1)
+               exit_with_error(errno);
+
+       print_verbose("NS switched: %s\n", nsname);
+
+       return nsfd;
+}
+
+static bool validate_interface(struct ifobject *ifobj)
+{
+       if (!strcmp(ifobj->ifname, ""))
+               return false;
+       return true;
+}
+
+static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx, int argc,
+                              char **argv)
+{
+       struct ifobject *ifobj;
+       u32 interface_nb = 0;
+       int option_index, c;
+
+       opterr = 0;
+
+       for (;;) {
+               char *sptr, *token;
+
+               c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index);
+               if (c == -1)
+                       break;
+
+               switch (c) {
+               case 'i':
+                       if (interface_nb == 0)
+                               ifobj = ifobj_tx;
+                       else if (interface_nb == 1)
+                               ifobj = ifobj_rx;
+                       else
+                               break;
+
+                       sptr = strndupa(optarg, strlen(optarg));
+                       memcpy(ifobj->ifname, strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
+                       token = strsep(&sptr, ",");
+                       if (token)
+                               memcpy(ifobj->nsname, token, MAX_INTERFACES_NAMESPACE_CHARS);
+                       interface_nb++;
+                       break;
+               case 'D':
+                       opt_pkt_dump = true;
+                       break;
+               case 'v':
+                       opt_verbose = true;
+                       break;
+               case 'b':
+                       ifobj_tx->busy_poll = true;
+                       ifobj_rx->busy_poll = true;
+                       break;
+               default:
+                       usage(basename(argv[0]));
+                       ksft_exit_xfail();
+               }
+       }
+}
+
+static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+                            struct ifobject *ifobj_rx)
+{
+       u32 i, j;
+
+       for (i = 0; i < MAX_INTERFACES; i++) {
+               struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+               ifobj->xsk = &ifobj->xsk_arr[0];
+               ifobj->use_poll = false;
+               ifobj->use_fill_ring = true;
+               ifobj->release_rx = true;
+               ifobj->pkt_stream = test->pkt_stream_default;
+               ifobj->validation_func = NULL;
+
+               if (i == 0) {
+                       ifobj->rx_on = false;
+                       ifobj->tx_on = true;
+               } else {
+                       ifobj->rx_on = true;
+                       ifobj->tx_on = false;
+               }
+
+               memset(ifobj->umem, 0, sizeof(*ifobj->umem));
+               ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
+               ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+
+               for (j = 0; j < MAX_SOCKETS; j++) {
+                       memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
+                       ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+               }
+       }
+
+       test->ifobj_tx = ifobj_tx;
+       test->ifobj_rx = ifobj_rx;
+       test->current_step = 0;
+       test->total_steps = 1;
+       test->nb_sockets = 1;
+       test->fail = false;
+}
+
+static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+                          struct ifobject *ifobj_rx, enum test_mode mode)
+{
+       struct pkt_stream *pkt_stream;
+       u32 i;
+
+       pkt_stream = test->pkt_stream_default;
+       memset(test, 0, sizeof(*test));
+       test->pkt_stream_default = pkt_stream;
+
+       for (i = 0; i < MAX_INTERFACES; i++) {
+               struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+               ifobj->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+               if (mode == TEST_MODE_SKB)
+                       ifobj->xdp_flags |= XDP_FLAGS_SKB_MODE;
+               else
+                       ifobj->xdp_flags |= XDP_FLAGS_DRV_MODE;
+
+               ifobj->bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
+       }
+
+       __test_spec_init(test, ifobj_tx, ifobj_rx);
+}
+
+static void test_spec_reset(struct test_spec *test)
+{
+       __test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
+}
+
+static void test_spec_set_name(struct test_spec *test, const char *name)
+{
+       strncpy(test->name, name, MAX_TEST_NAME_SIZE);
+}
+
+static void pkt_stream_reset(struct pkt_stream *pkt_stream)
+{
+       if (pkt_stream)
+               pkt_stream->rx_pkt_nb = 0;
+}
+
+static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
+{
+       if (pkt_nb >= pkt_stream->nb_pkts)
+               return NULL;
+
+       return &pkt_stream->pkts[pkt_nb];
+}
+
+static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
+{
+       while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) {
+               (*pkts_sent)++;
+               if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid)
+                       return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++];
+               pkt_stream->rx_pkt_nb++;
+       }
+       return NULL;
+}
+
+static void pkt_stream_delete(struct pkt_stream *pkt_stream)
+{
+       free(pkt_stream->pkts);
+       free(pkt_stream);
+}
+
+static void pkt_stream_restore_default(struct test_spec *test)
+{
+       struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream;
+
+       if (tx_pkt_stream != test->pkt_stream_default) {
+               pkt_stream_delete(test->ifobj_tx->pkt_stream);
+               test->ifobj_tx->pkt_stream = test->pkt_stream_default;
+       }
+
+       if (test->ifobj_rx->pkt_stream != test->pkt_stream_default &&
+           test->ifobj_rx->pkt_stream != tx_pkt_stream)
+               pkt_stream_delete(test->ifobj_rx->pkt_stream);
+       test->ifobj_rx->pkt_stream = test->pkt_stream_default;
+}
+
+static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
+{
+       struct pkt_stream *pkt_stream;
+
+       pkt_stream = calloc(1, sizeof(*pkt_stream));
+       if (!pkt_stream)
+               return NULL;
+
+       pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+       if (!pkt_stream->pkts) {
+               free(pkt_stream);
+               return NULL;
+       }
+
+       pkt_stream->nb_pkts = nb_pkts;
+       return pkt_stream;
+}
+
+static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len)
+{
+       pkt->addr = addr;
+       pkt->len = len;
+       if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom)
+               pkt->valid = false;
+       else
+               pkt->valid = true;
+}
+
+static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
+{
+       struct pkt_stream *pkt_stream;
+       u32 i;
+
+       pkt_stream = __pkt_stream_alloc(nb_pkts);
+       if (!pkt_stream)
+               exit_with_error(ENOMEM);
+
+       pkt_stream->nb_pkts = nb_pkts;
+       for (i = 0; i < nb_pkts; i++) {
+               pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size,
+                       pkt_len);
+               pkt_stream->pkts[i].payload = i;
+       }
+
+       return pkt_stream;
+}
+
+static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem,
+                                          struct pkt_stream *pkt_stream)
+{
+       return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
+}
+
+static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+       struct pkt_stream *pkt_stream;
+
+       pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len);
+       test->ifobj_tx->pkt_stream = pkt_stream;
+       test->ifobj_rx->pkt_stream = pkt_stream;
+}
+
+static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
+{
+       struct xsk_umem_info *umem = test->ifobj_tx->umem;
+       struct pkt_stream *pkt_stream;
+       u32 i;
+
+       pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default);
+       for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2)
+               pkt_set(umem, &pkt_stream->pkts[i],
+                       (i % umem->num_frames) * umem->frame_size + offset, pkt_len);
+
+       test->ifobj_tx->pkt_stream = pkt_stream;
+       test->ifobj_rx->pkt_stream = pkt_stream;
+}
+
+static void pkt_stream_receive_half(struct test_spec *test)
+{
+       struct xsk_umem_info *umem = test->ifobj_rx->umem;
+       struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream;
+       u32 i;
+
+       test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts,
+                                                        pkt_stream->pkts[0].len);
+       pkt_stream = test->ifobj_rx->pkt_stream;
+       for (i = 1; i < pkt_stream->nb_pkts; i += 2)
+               pkt_stream->pkts[i].valid = false;
+}
+
+static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
+{
+       struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
+       struct udphdr *udp_hdr;
+       struct ethhdr *eth_hdr;
+       struct iphdr *ip_hdr;
+       void *data;
+
+       if (!pkt)
+               return NULL;
+       if (!pkt->valid || pkt->len < MIN_PKT_SIZE)
+               return pkt;
+
+       data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
+       udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr));
+       ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+       eth_hdr = (struct ethhdr *)data;
+
+       gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr);
+       gen_ip_hdr(ifobject, ip_hdr);
+       gen_udp_csum(udp_hdr, ip_hdr);
+       gen_eth_hdr(ifobject, eth_hdr);
+
+       return pkt;
+}
+
+static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
+{
+       struct pkt_stream *pkt_stream;
+       u32 i;
+
+       pkt_stream = __pkt_stream_alloc(nb_pkts);
+       if (!pkt_stream)
+               exit_with_error(ENOMEM);
+
+       test->ifobj_tx->pkt_stream = pkt_stream;
+       test->ifobj_rx->pkt_stream = pkt_stream;
+
+       for (i = 0; i < nb_pkts; i++) {
+               pkt_stream->pkts[i].addr = pkts[i].addr;
+               pkt_stream->pkts[i].len = pkts[i].len;
+               pkt_stream->pkts[i].payload = i;
+               pkt_stream->pkts[i].valid = pkts[i].valid;
+       }
+}
+
+static void pkt_dump(void *pkt, u32 len)
+{
+       char s[INET_ADDRSTRLEN];
+       struct ethhdr *ethhdr;
+       struct udphdr *udphdr;
+       struct iphdr *iphdr;
+       int payload, i;
+
+       ethhdr = pkt;
+       iphdr = pkt + sizeof(*ethhdr);
+       udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr);
+
+       /*extract L2 frame */
+       fprintf(stdout, "DEBUG>> L2: dst mac: ");
+       for (i = 0; i < ETH_ALEN; i++)
+               fprintf(stdout, "%02X", ethhdr->h_dest[i]);
+
+       fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+       for (i = 0; i < ETH_ALEN; i++)
+               fprintf(stdout, "%02X", ethhdr->h_source[i]);
+
+       /*extract L3 frame */
+       fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
+       fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
+               inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
+       fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
+               inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
+       /*extract L4 frame */
+       fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
+       fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
+       /*extract L5 frame */
+       payload = *((uint32_t *)(pkt + PKT_HDR_SIZE));
+
+       fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
+       fprintf(stdout, "---------------------------------------\n");
+}
+
+static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr,
+                             u64 pkt_stream_addr)
+{
+       u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
+       u32 offset = addr % umem->frame_size, expected_offset = 0;
+
+       if (!pkt_stream->use_addr_for_fill)
+               pkt_stream_addr = 0;
+
+       expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
+
+       if (offset == expected_offset)
+               return true;
+
+       ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
+       return false;
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
+{
+       void *data = xsk_umem__get_data(buffer, addr);
+       struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+
+       if (!pkt) {
+               ksft_print_msg("[%s] too many packets received\n", __func__);
+               return false;
+       }
+
+       if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) {
+               /* Do not try to verify packets that are smaller than minimum size. */
+               return true;
+       }
+
+       if (pkt->len != len) {
+               ksft_print_msg("[%s] expected length [%d], got length [%d]\n",
+                              __func__, pkt->len, len);
+               return false;
+       }
+
+       if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
+               u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
+
+               if (opt_pkt_dump)
+                       pkt_dump(data, PKT_SIZE);
+
+               if (pkt->payload != seqnum) {
+                       ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n",
+                                      __func__, pkt->payload, seqnum);
+                       return false;
+               }
+       } else {
+               ksft_print_msg("Invalid frame received: ");
+               ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
+                              iphdr->tos);
+               return false;
+       }
+
+       return true;
+}
+
+static void kick_tx(struct xsk_socket_info *xsk)
+{
+       int ret;
+
+       ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+       if (ret >= 0)
+               return;
+       if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
+               usleep(100);
+               return;
+       }
+       exit_with_error(errno);
+}
+
+static void kick_rx(struct xsk_socket_info *xsk)
+{
+       int ret;
+
+       ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+       if (ret < 0)
+               exit_with_error(errno);
+}
+
+static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
+{
+       unsigned int rcvd;
+       u32 idx;
+
+       if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+               kick_tx(xsk);
+
+       rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
+       if (rcvd) {
+               if (rcvd > xsk->outstanding_tx) {
+                       u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
+
+                       ksft_print_msg("[%s] Too many packets completed\n", __func__);
+                       ksft_print_msg("Last completion address: %llx\n", addr);
+                       return TEST_FAILURE;
+               }
+
+               xsk_ring_cons__release(&xsk->umem->cq, rcvd);
+               xsk->outstanding_tx -= rcvd;
+       }
+
+       return TEST_PASS;
+}
+
+static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
+{
+       struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0};
+       u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0;
+       struct pkt_stream *pkt_stream = ifobj->pkt_stream;
+       struct xsk_socket_info *xsk = ifobj->xsk;
+       struct xsk_umem_info *umem = xsk->umem;
+       struct pkt *pkt;
+       int ret;
+
+       ret = gettimeofday(&tv_now, NULL);
+       if (ret)
+               exit_with_error(errno);
+       timeradd(&tv_now, &tv_timeout, &tv_end);
+
+       pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
+       while (pkt) {
+               ret = gettimeofday(&tv_now, NULL);
+               if (ret)
+                       exit_with_error(errno);
+               if (timercmp(&tv_now, &tv_end, >)) {
+                       ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+                       return TEST_FAILURE;
+               }
+
+               kick_rx(xsk);
+
+               rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+               if (!rcvd) {
+                       if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+                               ret = poll(fds, 1, POLL_TMOUT);
+                               if (ret < 0)
+                                       exit_with_error(-ret);
+                       }
+                       continue;
+               }
+
+               if (ifobj->use_fill_ring) {
+                       ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+                       while (ret != rcvd) {
+                               if (ret < 0)
+                                       exit_with_error(-ret);
+                               if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+                                       ret = poll(fds, 1, POLL_TMOUT);
+                                       if (ret < 0)
+                                               exit_with_error(-ret);
+                               }
+                               ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+                       }
+               }
+
+               for (i = 0; i < rcvd; i++) {
+                       const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+                       u64 addr = desc->addr, orig;
+
+                       orig = xsk_umem__extract_addr(addr);
+                       addr = xsk_umem__add_offset_to_addr(addr);
+
+                       if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) ||
+                           !is_offset_correct(umem, pkt_stream, addr, pkt->addr))
+                               return TEST_FAILURE;
+
+                       if (ifobj->use_fill_ring)
+                               *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+                       pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
+               }
+
+               if (ifobj->use_fill_ring)
+                       xsk_ring_prod__submit(&umem->fq, rcvd);
+               if (ifobj->release_rx)
+                       xsk_ring_cons__release(&xsk->rx, rcvd);
+
+               pthread_mutex_lock(&pacing_mutex);
+               pkts_in_flight -= pkts_sent;
+               if (pkts_in_flight < umem->num_frames)
+                       pthread_cond_signal(&pacing_cond);
+               pthread_mutex_unlock(&pacing_mutex);
+               pkts_sent = 0;
+       }
+
+       return TEST_PASS;
+}
+
+static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb)
+{
+       struct xsk_socket_info *xsk = ifobject->xsk;
+       u32 i, idx, valid_pkts = 0;
+
+       while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE)
+               complete_pkts(xsk, BATCH_SIZE);
+
+       for (i = 0; i < BATCH_SIZE; i++) {
+               struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+               struct pkt *pkt = pkt_generate(ifobject, *pkt_nb);
+
+               if (!pkt)
+                       break;
+
+               tx_desc->addr = pkt->addr;
+               tx_desc->len = pkt->len;
+               (*pkt_nb)++;
+               if (pkt->valid)
+                       valid_pkts++;
+       }
+
+       pthread_mutex_lock(&pacing_mutex);
+       pkts_in_flight += valid_pkts;
+       /* pkts_in_flight might be negative if many invalid packets are sent */
+       if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) {
+               kick_tx(xsk);
+               pthread_cond_wait(&pacing_cond, &pacing_mutex);
+       }
+       pthread_mutex_unlock(&pacing_mutex);
+
+       xsk_ring_prod__submit(&xsk->tx, i);
+       xsk->outstanding_tx += valid_pkts;
+       if (complete_pkts(xsk, i))
+               return TEST_FAILURE;
+
+       usleep(10);
+       return TEST_PASS;
+}
+
+static void wait_for_tx_completion(struct xsk_socket_info *xsk)
+{
+       while (xsk->outstanding_tx)
+               complete_pkts(xsk, BATCH_SIZE);
+}
+
+static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
+{
+       struct pollfd fds = { };
+       u32 pkt_cnt = 0;
+
+       fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
+       fds.events = POLLOUT;
+
+       while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
+               int err;
+
+               if (ifobject->use_poll) {
+                       int ret;
+
+                       ret = poll(&fds, 1, POLL_TMOUT);
+                       if (ret <= 0)
+                               continue;
+
+                       if (!(fds.revents & POLLOUT))
+                               continue;
+               }
+
+               err = __send_pkts(ifobject, &pkt_cnt);
+               if (err || test->fail)
+                       return TEST_FAILURE;
+       }
+
+       wait_for_tx_completion(ifobject->xsk);
+       return TEST_PASS;
+}
+
+static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
+{
+       int fd = xsk_socket__fd(xsk), err;
+       socklen_t optlen, expected_len;
+
+       optlen = sizeof(*stats);
+       err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
+       if (err) {
+               ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+                              __func__, -err, strerror(-err));
+               return TEST_FAILURE;
+       }
+
+       expected_len = sizeof(struct xdp_statistics);
+       if (optlen != expected_len) {
+               ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
+                              __func__, expected_len, optlen);
+               return TEST_FAILURE;
+       }
+
+       return TEST_PASS;
+}
+
+static int validate_rx_dropped(struct ifobject *ifobject)
+{
+       struct xsk_socket *xsk = ifobject->xsk->xsk;
+       struct xdp_statistics stats;
+       int err;
+
+       kick_rx(ifobject->xsk);
+
+       err = get_xsk_stats(xsk, &stats);
+       if (err)
+               return TEST_FAILURE;
+
+       if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2)
+               return TEST_PASS;
+
+       return TEST_FAILURE;
+}
+
+static int validate_rx_full(struct ifobject *ifobject)
+{
+       struct xsk_socket *xsk = ifobject->xsk->xsk;
+       struct xdp_statistics stats;
+       int err;
+
+       usleep(1000);
+       kick_rx(ifobject->xsk);
+
+       err = get_xsk_stats(xsk, &stats);
+       if (err)
+               return TEST_FAILURE;
+
+       if (stats.rx_ring_full)
+               return TEST_PASS;
+
+       return TEST_FAILURE;
+}
+
+static int validate_fill_empty(struct ifobject *ifobject)
+{
+       struct xsk_socket *xsk = ifobject->xsk->xsk;
+       struct xdp_statistics stats;
+       int err;
+
+       usleep(1000);
+       kick_rx(ifobject->xsk);
+
+       err = get_xsk_stats(xsk, &stats);
+       if (err)
+               return TEST_FAILURE;
+
+       if (stats.rx_fill_ring_empty_descs)
+               return TEST_PASS;
+
+       return TEST_FAILURE;
+}
+
+static int validate_tx_invalid_descs(struct ifobject *ifobject)
+{
+       struct xsk_socket *xsk = ifobject->xsk->xsk;
+       int fd = xsk_socket__fd(xsk);
+       struct xdp_statistics stats;
+       socklen_t optlen;
+       int err;
+
+       optlen = sizeof(stats);
+       err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+       if (err) {
+               ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+                              __func__, -err, strerror(-err));
+               return TEST_FAILURE;
+       }
+
+       if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) {
+               ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
+                              __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
+               return TEST_FAILURE;
+       }
+
+       return TEST_PASS;
+}
+
+static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
+{
+       u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
+       int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+       LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+       int ret, ifindex;
+       void *bufs;
+       u32 i;
+
+       ifobject->ns_fd = switch_namespace(ifobject->nsname);
+
+       if (ifobject->umem->unaligned_mode)
+               mmap_flags |= MAP_HUGETLB;
+
+       bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+       if (bufs == MAP_FAILED)
+               exit_with_error(errno);
+
+       ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz);
+       if (ret)
+               exit_with_error(-ret);
+
+       for (i = 0; i < test->nb_sockets; i++) {
+               u32 ctr = 0;
+
+               while (ctr++ < SOCK_RECONF_CTR) {
+                       ret = xsk_configure_socket(&ifobject->xsk_arr[i], ifobject->umem,
+                                                  ifobject, !!i);
+                       if (!ret)
+                               break;
+
+                       /* Retry if it fails as xsk_socket__create() is asynchronous */
+                       if (ctr >= SOCK_RECONF_CTR)
+                               exit_with_error(-ret);
+                       usleep(USLEEP_MAX);
+               }
+
+               if (ifobject->busy_poll)
+                       enable_busy_poll(&ifobject->xsk_arr[i]);
+       }
+
+       ifobject->xsk = &ifobject->xsk_arr[0];
+
+       if (!ifobject->rx_on)
+               return;
+
+       ifindex = if_nametoindex(ifobject->ifname);
+       if (!ifindex)
+               exit_with_error(errno);
+
+       ret = xsk_setup_xdp_prog_xsk(ifobject->xsk->xsk, &ifobject->xsk_map_fd);
+       if (ret)
+               exit_with_error(-ret);
+
+       ret = bpf_xdp_query(ifindex, ifobject->xdp_flags, &opts);
+       if (ret)
+               exit_with_error(-ret);
+
+       if (ifobject->xdp_flags & XDP_FLAGS_SKB_MODE) {
+               if (opts.attach_mode != XDP_ATTACHED_SKB) {
+                       ksft_print_msg("ERROR: [%s] XDP prog not in SKB mode\n");
+                       exit_with_error(-EINVAL);
+               }
+       } else if (ifobject->xdp_flags & XDP_FLAGS_DRV_MODE) {
+               if (opts.attach_mode != XDP_ATTACHED_DRV) {
+                       ksft_print_msg("ERROR: [%s] XDP prog not in DRV mode\n");
+                       exit_with_error(-EINVAL);
+               }
+       }
+
+       ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd);
+       if (ret)
+               exit_with_error(-ret);
+}
+
+static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
+{
+       print_verbose("Destroying socket\n");
+       xsk_socket__delete(ifobj->xsk->xsk);
+       munmap(ifobj->umem->buffer, ifobj->umem->num_frames * ifobj->umem->frame_size);
+       xsk_umem__delete(ifobj->umem->umem);
+}
+
+static void *worker_testapp_validate_tx(void *arg)
+{
+       struct test_spec *test = (struct test_spec *)arg;
+       struct ifobject *ifobject = test->ifobj_tx;
+       int err;
+
+       if (test->current_step == 1)
+               thread_common_ops(test, ifobject);
+
+       print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
+                     ifobject->ifname);
+       err = send_pkts(test, ifobject);
+
+       if (!err && ifobject->validation_func)
+               err = ifobject->validation_func(ifobject);
+       if (err)
+               report_failure(test);
+
+       if (test->total_steps == test->current_step || err)
+               testapp_cleanup_xsk_res(ifobject);
+       pthread_exit(NULL);
+}
+
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream)
+{
+       u32 idx = 0, i, buffers_to_fill;
+       int ret;
+
+       if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+               buffers_to_fill = umem->num_frames;
+       else
+               buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+
+       ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
+       if (ret != buffers_to_fill)
+               exit_with_error(ENOSPC);
+       for (i = 0; i < buffers_to_fill; i++) {
+               u64 addr;
+
+               if (pkt_stream->use_addr_for_fill) {
+                       struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i);
+
+                       if (!pkt)
+                               break;
+                       addr = pkt->addr;
+               } else {
+                       addr = i * umem->frame_size;
+               }
+
+               *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
+       }
+       xsk_ring_prod__submit(&umem->fq, buffers_to_fill);
+}
+
+static void *worker_testapp_validate_rx(void *arg)
+{
+       struct test_spec *test = (struct test_spec *)arg;
+       struct ifobject *ifobject = test->ifobj_rx;
+       struct pollfd fds = { };
+       int err;
+
+       if (test->current_step == 1)
+               thread_common_ops(test, ifobject);
+
+       xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream);
+
+       fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
+       fds.events = POLLIN;
+
+       pthread_barrier_wait(&barr);
+
+       err = receive_pkts(ifobject, &fds);
+
+       if (!err && ifobject->validation_func)
+               err = ifobject->validation_func(ifobject);
+       if (err) {
+               report_failure(test);
+               pthread_mutex_lock(&pacing_mutex);
+               pthread_cond_signal(&pacing_cond);
+               pthread_mutex_unlock(&pacing_mutex);
+       }
+
+       if (test->total_steps == test->current_step || err)
+               testapp_cleanup_xsk_res(ifobject);
+       pthread_exit(NULL);
+}
+
+static int testapp_validate_traffic(struct test_spec *test)
+{
+       struct ifobject *ifobj_tx = test->ifobj_tx;
+       struct ifobject *ifobj_rx = test->ifobj_rx;
+       pthread_t t0, t1;
+
+       if (pthread_barrier_init(&barr, NULL, 2))
+               exit_with_error(errno);
+
+       test->current_step++;
+       pkt_stream_reset(ifobj_rx->pkt_stream);
+       pkts_in_flight = 0;
+
+       /*Spawn RX thread */
+       pthread_create(&t0, NULL, ifobj_rx->func_ptr, test);
+
+       pthread_barrier_wait(&barr);
+       if (pthread_barrier_destroy(&barr))
+               exit_with_error(errno);
+
+       /*Spawn TX thread */
+       pthread_create(&t1, NULL, ifobj_tx->func_ptr, test);
+
+       pthread_join(t1, NULL);
+       pthread_join(t0, NULL);
+
+       return !!test->fail;
+}
+
+static void testapp_teardown(struct test_spec *test)
+{
+       int i;
+
+       test_spec_set_name(test, "TEARDOWN");
+       for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
+               if (testapp_validate_traffic(test))
+                       return;
+               test_spec_reset(test);
+       }
+}
+
+static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
+{
+       thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
+       struct ifobject *tmp_ifobj = (*ifobj1);
+
+       (*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
+       (*ifobj2)->func_ptr = tmp_func_ptr;
+
+       *ifobj1 = *ifobj2;
+       *ifobj2 = tmp_ifobj;
+}
+
+static void testapp_bidi(struct test_spec *test)
+{
+       test_spec_set_name(test, "BIDIRECTIONAL");
+       test->ifobj_tx->rx_on = true;
+       test->ifobj_rx->tx_on = true;
+       test->total_steps = 2;
+       if (testapp_validate_traffic(test))
+               return;
+
+       print_verbose("Switching Tx/Rx vectors\n");
+       swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+       testapp_validate_traffic(test);
+
+       swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+}
+
+static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
+{
+       int ret;
+
+       xsk_socket__delete(ifobj_tx->xsk->xsk);
+       xsk_socket__delete(ifobj_rx->xsk->xsk);
+       ifobj_tx->xsk = &ifobj_tx->xsk_arr[1];
+       ifobj_rx->xsk = &ifobj_rx->xsk_arr[1];
+
+       ret = xsk_socket__update_xskmap(ifobj_rx->xsk->xsk, ifobj_rx->xsk_map_fd);
+       if (ret)
+               exit_with_error(-ret);
+}
+
+static void testapp_bpf_res(struct test_spec *test)
+{
+       test_spec_set_name(test, "BPF_RES");
+       test->total_steps = 2;
+       test->nb_sockets = 2;
+       if (testapp_validate_traffic(test))
+               return;
+
+       swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
+       testapp_validate_traffic(test);
+}
+
+static void testapp_headroom(struct test_spec *test)
+{
+       test_spec_set_name(test, "UMEM_HEADROOM");
+       test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
+       testapp_validate_traffic(test);
+}
+
+static void testapp_stats_rx_dropped(struct test_spec *test)
+{
+       test_spec_set_name(test, "STAT_RX_DROPPED");
+       test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
+               XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
+       pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
+       pkt_stream_receive_half(test);
+       test->ifobj_rx->validation_func = validate_rx_dropped;
+       testapp_validate_traffic(test);
+}
+
+static void testapp_stats_tx_invalid_descs(struct test_spec *test)
+{
+       test_spec_set_name(test, "STAT_TX_INVALID");
+       pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
+       test->ifobj_tx->validation_func = validate_tx_invalid_descs;
+       testapp_validate_traffic(test);
+
+       pkt_stream_restore_default(test);
+}
+
+static void testapp_stats_rx_full(struct test_spec *test)
+{
+       test_spec_set_name(test, "STAT_RX_FULL");
+       pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE);
+       test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
+                                                        DEFAULT_UMEM_BUFFERS, PKT_SIZE);
+       if (!test->ifobj_rx->pkt_stream)
+               exit_with_error(ENOMEM);
+
+       test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
+       test->ifobj_rx->release_rx = false;
+       test->ifobj_rx->validation_func = validate_rx_full;
+       testapp_validate_traffic(test);
+
+       pkt_stream_restore_default(test);
+}
+
+static void testapp_stats_fill_empty(struct test_spec *test)
+{
+       test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
+       pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE);
+       test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
+                                                        DEFAULT_UMEM_BUFFERS, PKT_SIZE);
+       if (!test->ifobj_rx->pkt_stream)
+               exit_with_error(ENOMEM);
+
+       test->ifobj_rx->use_fill_ring = false;
+       test->ifobj_rx->validation_func = validate_fill_empty;
+       testapp_validate_traffic(test);
+
+       pkt_stream_restore_default(test);
+}
+
+/* Simple test */
+static bool hugepages_present(struct ifobject *ifobject)
+{
+       const size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size;
+       void *bufs;
+
+       bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+       if (bufs == MAP_FAILED)
+               return false;
+
+       munmap(bufs, mmap_sz);
+       return true;
+}
+
+static bool testapp_unaligned(struct test_spec *test)
+{
+       if (!hugepages_present(test->ifobj_tx)) {
+               ksft_test_result_skip("No 2M huge pages present.\n");
+               return false;
+       }
+
+       test_spec_set_name(test, "UNALIGNED_MODE");
+       test->ifobj_tx->umem->unaligned_mode = true;
+       test->ifobj_rx->umem->unaligned_mode = true;
+       /* Let half of the packets straddle a buffer boundrary */
+       pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2);
+       test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
+       testapp_validate_traffic(test);
+
+       pkt_stream_restore_default(test);
+       return true;
+}
+
+static void testapp_single_pkt(struct test_spec *test)
+{
+       struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}};
+
+       pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
+       testapp_validate_traffic(test);
+       pkt_stream_restore_default(test);
+}
+
+static void testapp_invalid_desc(struct test_spec *test)
+{
+       struct pkt pkts[] = {
+               /* Zero packet address allowed */
+               {0, PKT_SIZE, 0, true},
+               /* Allowed packet */
+               {0x1000, PKT_SIZE, 0, true},
+               /* Straddling the start of umem */
+               {-2, PKT_SIZE, 0, false},
+               /* Packet too large */
+               {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
+               /* After umem ends */
+               {UMEM_SIZE, PKT_SIZE, 0, false},
+               /* Straddle the end of umem */
+               {UMEM_SIZE - PKT_SIZE / 2, PKT_SIZE, 0, false},
+               /* Straddle a page boundrary */
+               {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false},
+               /* Straddle a 2K boundrary */
+               {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true},
+               /* Valid packet for synch so that something is received */
+               {0x4000, PKT_SIZE, 0, true}};
+
+       if (test->ifobj_tx->umem->unaligned_mode) {
+               /* Crossing a page boundrary allowed */
+               pkts[6].valid = true;
+       }
+       if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
+               /* Crossing a 2K frame size boundrary not allowed */
+               pkts[7].valid = false;
+       }
+
+       pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
+       testapp_validate_traffic(test);
+       pkt_stream_restore_default(test);
+}
+
+static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
+                      const char *dst_ip, const char *src_ip, const u16 dst_port,
+                      const u16 src_port, thread_func_t func_ptr)
+{
+       struct in_addr ip;
+
+       memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
+       memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
+
+       inet_aton(dst_ip, &ip);
+       ifobj->dst_ip = ip.s_addr;
+
+       inet_aton(src_ip, &ip);
+       ifobj->src_ip = ip.s_addr;
+
+       ifobj->dst_port = dst_port;
+       ifobj->src_port = src_port;
+
+       ifobj->func_ptr = func_ptr;
+}
+
+static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
+{
+       switch (type) {
+       case TEST_TYPE_STATS_RX_DROPPED:
+               testapp_stats_rx_dropped(test);
+               break;
+       case TEST_TYPE_STATS_TX_INVALID_DESCS:
+               testapp_stats_tx_invalid_descs(test);
+               break;
+       case TEST_TYPE_STATS_RX_FULL:
+               testapp_stats_rx_full(test);
+               break;
+       case TEST_TYPE_STATS_FILL_EMPTY:
+               testapp_stats_fill_empty(test);
+               break;
+       case TEST_TYPE_TEARDOWN:
+               testapp_teardown(test);
+               break;
+       case TEST_TYPE_BIDI:
+               testapp_bidi(test);
+               break;
+       case TEST_TYPE_BPF_RES:
+               testapp_bpf_res(test);
+               break;
+       case TEST_TYPE_RUN_TO_COMPLETION:
+               test_spec_set_name(test, "RUN_TO_COMPLETION");
+               testapp_validate_traffic(test);
+               break;
+       case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT:
+               test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT");
+               testapp_single_pkt(test);
+               break;
+       case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME:
+               test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE");
+               test->ifobj_tx->umem->frame_size = 2048;
+               test->ifobj_rx->umem->frame_size = 2048;
+               pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE);
+               testapp_validate_traffic(test);
+
+               pkt_stream_restore_default(test);
+               break;
+       case TEST_TYPE_POLL:
+               test->ifobj_tx->use_poll = true;
+               test->ifobj_rx->use_poll = true;
+               test_spec_set_name(test, "POLL");
+               testapp_validate_traffic(test);
+               break;
+       case TEST_TYPE_ALIGNED_INV_DESC:
+               test_spec_set_name(test, "ALIGNED_INV_DESC");
+               testapp_invalid_desc(test);
+               break;
+       case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME:
+               test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE");
+               test->ifobj_tx->umem->frame_size = 2048;
+               test->ifobj_rx->umem->frame_size = 2048;
+               testapp_invalid_desc(test);
+               break;
+       case TEST_TYPE_UNALIGNED_INV_DESC:
+               if (!hugepages_present(test->ifobj_tx)) {
+                       ksft_test_result_skip("No 2M huge pages present.\n");
+                       return;
+               }
+               test_spec_set_name(test, "UNALIGNED_INV_DESC");
+               test->ifobj_tx->umem->unaligned_mode = true;
+               test->ifobj_rx->umem->unaligned_mode = true;
+               testapp_invalid_desc(test);
+               break;
+       case TEST_TYPE_UNALIGNED:
+               if (!testapp_unaligned(test))
+                       return;
+               break;
+       case TEST_TYPE_HEADROOM:
+               testapp_headroom(test);
+               break;
+       default:
+               break;
+       }
+
+       if (!test->fail)
+               ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
+                                     test->name);
+}
+
+static struct ifobject *ifobject_create(void)
+{
+       struct ifobject *ifobj;
+
+       ifobj = calloc(1, sizeof(struct ifobject));
+       if (!ifobj)
+               return NULL;
+
+       ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
+       if (!ifobj->xsk_arr)
+               goto out_xsk_arr;
+
+       ifobj->umem = calloc(1, sizeof(*ifobj->umem));
+       if (!ifobj->umem)
+               goto out_umem;
+
+       return ifobj;
+
+out_umem:
+       free(ifobj->xsk_arr);
+out_xsk_arr:
+       free(ifobj);
+       return NULL;
+}
+
+static void ifobject_delete(struct ifobject *ifobj)
+{
+       free(ifobj->umem);
+       free(ifobj->xsk_arr);
+       free(ifobj);
+}
+
+int main(int argc, char **argv)
+{
+       struct pkt_stream *pkt_stream_default;
+       struct ifobject *ifobj_tx, *ifobj_rx;
+       u32 i, j, failed_tests = 0;
+       struct test_spec test;
+
+       /* Use libbpf 1.0 API mode */
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+       ifobj_tx = ifobject_create();
+       if (!ifobj_tx)
+               exit_with_error(ENOMEM);
+       ifobj_rx = ifobject_create();
+       if (!ifobj_rx)
+               exit_with_error(ENOMEM);
+
+       setlocale(LC_ALL, "");
+
+       parse_command_line(ifobj_tx, ifobj_rx, argc, argv);
+
+       if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) {
+               usage(basename(argv[0]));
+               ksft_exit_xfail();
+       }
+
+       init_iface(ifobj_tx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2,
+                  worker_testapp_validate_tx);
+       init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1,
+                  worker_testapp_validate_rx);
+
+       test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
+       pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
+       if (!pkt_stream_default)
+               exit_with_error(ENOMEM);
+       test.pkt_stream_default = pkt_stream_default;
+
+       ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
+
+       for (i = 0; i < TEST_MODE_MAX; i++)
+               for (j = 0; j < TEST_TYPE_MAX; j++) {
+                       test_spec_init(&test, ifobj_tx, ifobj_rx, i);
+                       run_pkt_test(&test, i, j);
+                       usleep(USLEEP_MAX);
+
+                       if (test.fail)
+                               failed_tests++;
+               }
+
+       pkt_stream_delete(pkt_stream_default);
+       ifobject_delete(ifobj_tx);
+       ifobject_delete(ifobj_rx);
+
+       if (failed_tests)
+               ksft_exit_fail();
+       else
+               ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
new file mode 100644 (file)
index 0000000..3d17053
--- /dev/null
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2020 Intel Corporation.
+ */
+
+#ifndef XSKXCEIVER_H_
+#define XSKXCEIVER_H_
+
+#ifndef SOL_XDP
+#define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+#define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+#define PF_XDP AF_XDP
+#endif
+
+#ifndef SO_BUSY_POLL_BUDGET
+#define SO_BUSY_POLL_BUDGET 70
+#endif
+
+#ifndef SO_PREFER_BUSY_POLL
+#define SO_PREFER_BUSY_POLL 69
+#endif
+
+#define TEST_PASS 0
+#define TEST_FAILURE -1
+#define MAX_INTERFACES 2
+#define MAX_INTERFACE_NAME_CHARS 7
+#define MAX_INTERFACES_NAMESPACE_CHARS 10
+#define MAX_SOCKETS 2
+#define MAX_TEST_NAME_SIZE 32
+#define MAX_TEARDOWN_ITER 10
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+                       sizeof(struct udphdr))
+#define MIN_ETH_PKT_SIZE 64
+#define ETH_FCS_SIZE 4
+#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE)
+#define PKT_SIZE (MIN_PKT_SIZE)
+#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
+#define IP_PKT_VER 0x4
+#define IP_PKT_TOS 0x9
+#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
+#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
+#define USLEEP_MAX 10000
+#define SOCK_RECONF_CTR 10
+#define BATCH_SIZE 64
+#define POLL_TMOUT 1000
+#define RECV_TMOUT 3
+#define DEFAULT_PKT_CNT (4 * 1024)
+#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
+#define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE)
+#define RX_FULL_RXQSIZE 32
+#define UMEM_HEADROOM_TEST_SIZE 128
+#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
+
+#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
+
+enum test_mode {
+       TEST_MODE_SKB,
+       TEST_MODE_DRV,
+       TEST_MODE_MAX
+};
+
+enum test_type {
+       TEST_TYPE_RUN_TO_COMPLETION,
+       TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME,
+       TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT,
+       TEST_TYPE_POLL,
+       TEST_TYPE_UNALIGNED,
+       TEST_TYPE_ALIGNED_INV_DESC,
+       TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME,
+       TEST_TYPE_UNALIGNED_INV_DESC,
+       TEST_TYPE_HEADROOM,
+       TEST_TYPE_TEARDOWN,
+       TEST_TYPE_BIDI,
+       TEST_TYPE_STATS_RX_DROPPED,
+       TEST_TYPE_STATS_TX_INVALID_DESCS,
+       TEST_TYPE_STATS_RX_FULL,
+       TEST_TYPE_STATS_FILL_EMPTY,
+       TEST_TYPE_BPF_RES,
+       TEST_TYPE_MAX
+};
+
+static bool opt_pkt_dump;
+static bool opt_verbose;
+
+struct xsk_umem_info {
+       struct xsk_ring_prod fq;
+       struct xsk_ring_cons cq;
+       struct xsk_umem *umem;
+       u32 num_frames;
+       u32 frame_headroom;
+       void *buffer;
+       u32 frame_size;
+       bool unaligned_mode;
+};
+
+struct xsk_socket_info {
+       struct xsk_ring_cons rx;
+       struct xsk_ring_prod tx;
+       struct xsk_umem_info *umem;
+       struct xsk_socket *xsk;
+       u32 outstanding_tx;
+       u32 rxqsize;
+};
+
+struct pkt {
+       u64 addr;
+       u32 len;
+       u32 payload;
+       bool valid;
+};
+
+struct pkt_stream {
+       u32 nb_pkts;
+       u32 rx_pkt_nb;
+       struct pkt *pkts;
+       bool use_addr_for_fill;
+};
+
+struct ifobject;
+typedef int (*validation_func_t)(struct ifobject *ifobj);
+typedef void *(*thread_func_t)(void *arg);
+
+struct ifobject {
+       char ifname[MAX_INTERFACE_NAME_CHARS];
+       char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
+       struct xsk_socket_info *xsk;
+       struct xsk_socket_info *xsk_arr;
+       struct xsk_umem_info *umem;
+       thread_func_t func_ptr;
+       validation_func_t validation_func;
+       struct pkt_stream *pkt_stream;
+       int ns_fd;
+       int xsk_map_fd;
+       u32 dst_ip;
+       u32 src_ip;
+       u32 xdp_flags;
+       u32 bind_flags;
+       u16 src_port;
+       u16 dst_port;
+       bool tx_on;
+       bool rx_on;
+       bool use_poll;
+       bool busy_poll;
+       bool use_fill_ring;
+       bool release_rx;
+       u8 dst_mac[ETH_ALEN];
+       u8 src_mac[ETH_ALEN];
+};
+
+struct test_spec {
+       struct ifobject *ifobj_tx;
+       struct ifobject *ifobj_rx;
+       struct pkt_stream *pkt_stream_default;
+       u16 total_steps;
+       u16 current_step;
+       u16 nb_sockets;
+       bool fail;
+       char name[MAX_TEST_NAME_SIZE];
+};
+
+pthread_barrier_t barr;
+pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER;
+
+int pkts_in_flight;
+
+#endif                         /* XSKXCEIVER_H_ */