tools/lib/bpf/libbpf.c

   1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
   2
   3 /*
   4  * Common eBPF ELF object loading operations.
   5  *
   6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
   7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
   8  * Copyright (C) 2015 Huawei Inc.
   9  * Copyright (C) 2017 Nicira, Inc.
  10  * Copyright (C) 2019 Isovalent, Inc.
  11  */
  12
  13 #ifndef _GNU_SOURCE
  14 #define _GNU_SOURCE
  15 #endif
  16 #include <stdlib.h>
  17 #include <stdio.h>
  18 #include <stdarg.h>
  19 #include <libgen.h>
  20 #include <inttypes.h>
  21 #include <limits.h>
  22 #include <string.h>
  23 #include <unistd.h>
  24 #include <endian.h>
  25 #include <fcntl.h>
  26 #include <errno.h>
  27 #include <ctype.h>
  28 #include <asm/unistd.h>
  29 #include <linux/err.h>
  30 #include <linux/kernel.h>
  31 #include <linux/bpf.h>
  32 #include <linux/btf.h>
  33 #include <linux/filter.h>
  34 #include <linux/limits.h>
  35 #include <linux/perf_event.h>
  36 #include <linux/ring_buffer.h>
  37 #include <linux/version.h>
  38 #include <sys/epoll.h>
  39 #include <sys/ioctl.h>
  40 #include <sys/mman.h>
  41 #include <sys/stat.h>
  42 #include <sys/types.h>
  43 #include <sys/vfs.h>
  44 #include <sys/utsname.h>
  45 #include <sys/resource.h>
  46 #include <libelf.h>
  47 #include <gelf.h>
  48 #include <zlib.h>
  49
  50 #include "libbpf.h"
  51 #include "bpf.h"
  52 #include "btf.h"
  53 #include "str_error.h"
  54 #include "libbpf_internal.h"
  55 #include "hashmap.h"
  56 #include "bpf_gen_internal.h"
  57
  58 #ifndef BPF_FS_MAGIC
  59 #define BPF_FS_MAGIC            0xcafe4a11
  60 #endif
  61
  62 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
  63
  64 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
  65  * compilation if user enables corresponding warning. Disable it explicitly.
  66  */
  67 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
  68
  69 #define __printf(a, b)  __attribute__((format(printf, a, b)))
  70
  71 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
  72 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
  73
  74 static const char * const attach_type_name[] = {
  75         [BPF_CGROUP_INET_INGRESS]       = "cgroup_inet_ingress",
  76         [BPF_CGROUP_INET_EGRESS]        = "cgroup_inet_egress",
  77         [BPF_CGROUP_INET_SOCK_CREATE]   = "cgroup_inet_sock_create",
  78         [BPF_CGROUP_INET_SOCK_RELEASE]  = "cgroup_inet_sock_release",
  79         [BPF_CGROUP_SOCK_OPS]           = "cgroup_sock_ops",
  80         [BPF_CGROUP_DEVICE]             = "cgroup_device",
  81         [BPF_CGROUP_INET4_BIND]         = "cgroup_inet4_bind",
  82         [BPF_CGROUP_INET6_BIND]         = "cgroup_inet6_bind",
  83         [BPF_CGROUP_INET4_CONNECT]      = "cgroup_inet4_connect",
  84         [BPF_CGROUP_INET6_CONNECT]      = "cgroup_inet6_connect",
  85         [BPF_CGROUP_INET4_POST_BIND]    = "cgroup_inet4_post_bind",
  86         [BPF_CGROUP_INET6_POST_BIND]    = "cgroup_inet6_post_bind",
  87         [BPF_CGROUP_INET4_GETPEERNAME]  = "cgroup_inet4_getpeername",
  88         [BPF_CGROUP_INET6_GETPEERNAME]  = "cgroup_inet6_getpeername",
  89         [BPF_CGROUP_INET4_GETSOCKNAME]  = "cgroup_inet4_getsockname",
  90         [BPF_CGROUP_INET6_GETSOCKNAME]  = "cgroup_inet6_getsockname",
  91         [BPF_CGROUP_UDP4_SENDMSG]       = "cgroup_udp4_sendmsg",
  92         [BPF_CGROUP_UDP6_SENDMSG]       = "cgroup_udp6_sendmsg",
  93         [BPF_CGROUP_SYSCTL]             = "cgroup_sysctl",
  94         [BPF_CGROUP_UDP4_RECVMSG]       = "cgroup_udp4_recvmsg",
  95         [BPF_CGROUP_UDP6_RECVMSG]       = "cgroup_udp6_recvmsg",
  96         [BPF_CGROUP_GETSOCKOPT]         = "cgroup_getsockopt",
  97         [BPF_CGROUP_SETSOCKOPT]         = "cgroup_setsockopt",
  98         [BPF_SK_SKB_STREAM_PARSER]      = "sk_skb_stream_parser",
  99         [BPF_SK_SKB_STREAM_VERDICT]     = "sk_skb_stream_verdict",
 100         [BPF_SK_SKB_VERDICT]            = "sk_skb_verdict",
 101         [BPF_SK_MSG_VERDICT]            = "sk_msg_verdict",
 102         [BPF_LIRC_MODE2]                = "lirc_mode2",
 103         [BPF_FLOW_DISSECTOR]            = "flow_dissector",
 104         [BPF_TRACE_RAW_TP]              = "trace_raw_tp",
 105         [BPF_TRACE_FENTRY]              = "trace_fentry",
 106         [BPF_TRACE_FEXIT]               = "trace_fexit",
 107         [BPF_MODIFY_RETURN]             = "modify_return",
 108         [BPF_LSM_MAC]                   = "lsm_mac",
 109         [BPF_LSM_CGROUP]                = "lsm_cgroup",
 110         [BPF_SK_LOOKUP]                 = "sk_lookup",
 111         [BPF_TRACE_ITER]                = "trace_iter",
 112         [BPF_XDP_DEVMAP]                = "xdp_devmap",
 113         [BPF_XDP_CPUMAP]                = "xdp_cpumap",
 114         [BPF_XDP]                       = "xdp",
 115         [BPF_SK_REUSEPORT_SELECT]       = "sk_reuseport_select",
 116         [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]    = "sk_reuseport_select_or_migrate",
 117         [BPF_PERF_EVENT]                = "perf_event",
 118         [BPF_TRACE_KPROBE_MULTI]        = "trace_kprobe_multi",
 119 };
 120
 121 static const char * const link_type_name[] = {
 122         [BPF_LINK_TYPE_UNSPEC]                  = "unspec",
 123         [BPF_LINK_TYPE_RAW_TRACEPOINT]          = "raw_tracepoint",
 124         [BPF_LINK_TYPE_TRACING]                 = "tracing",
 125         [BPF_LINK_TYPE_CGROUP]                  = "cgroup",
 126         [BPF_LINK_TYPE_ITER]                    = "iter",
 127         [BPF_LINK_TYPE_NETNS]                   = "netns",
 128         [BPF_LINK_TYPE_XDP]                     = "xdp",
 129         [BPF_LINK_TYPE_PERF_EVENT]              = "perf_event",
 130         [BPF_LINK_TYPE_KPROBE_MULTI]            = "kprobe_multi",
 131         [BPF_LINK_TYPE_STRUCT_OPS]              = "struct_ops",
 132 };
 133
 134 static const char * const map_type_name[] = {
 135         [BPF_MAP_TYPE_UNSPEC]                   = "unspec",
 136         [BPF_MAP_TYPE_HASH]                     = "hash",
 137         [BPF_MAP_TYPE_ARRAY]                    = "array",
 138         [BPF_MAP_TYPE_PROG_ARRAY]               = "prog_array",
 139         [BPF_MAP_TYPE_PERF_EVENT_ARRAY]         = "perf_event_array",
 140         [BPF_MAP_TYPE_PERCPU_HASH]              = "percpu_hash",
 141         [BPF_MAP_TYPE_PERCPU_ARRAY]             = "percpu_array",
 142         [BPF_MAP_TYPE_STACK_TRACE]              = "stack_trace",
 143         [BPF_MAP_TYPE_CGROUP_ARRAY]             = "cgroup_array",
 144         [BPF_MAP_TYPE_LRU_HASH]                 = "lru_hash",
 145         [BPF_MAP_TYPE_LRU_PERCPU_HASH]          = "lru_percpu_hash",
 146         [BPF_MAP_TYPE_LPM_TRIE]                 = "lpm_trie",
 147         [BPF_MAP_TYPE_ARRAY_OF_MAPS]            = "array_of_maps",
 148         [BPF_MAP_TYPE_HASH_OF_MAPS]             = "hash_of_maps",
 149         [BPF_MAP_TYPE_DEVMAP]                   = "devmap",
 150         [BPF_MAP_TYPE_DEVMAP_HASH]              = "devmap_hash",
 151         [BPF_MAP_TYPE_SOCKMAP]                  = "sockmap",
 152         [BPF_MAP_TYPE_CPUMAP]                   = "cpumap",
 153         [BPF_MAP_TYPE_XSKMAP]                   = "xskmap",
 154         [BPF_MAP_TYPE_SOCKHASH]                 = "sockhash",
 155         [BPF_MAP_TYPE_CGROUP_STORAGE]           = "cgroup_storage",
 156         [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY]      = "reuseport_sockarray",
 157         [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]    = "percpu_cgroup_storage",
 158         [BPF_MAP_TYPE_QUEUE]                    = "queue",
 159         [BPF_MAP_TYPE_STACK]                    = "stack",
 160         [BPF_MAP_TYPE_SK_STORAGE]               = "sk_storage",
 161         [BPF_MAP_TYPE_STRUCT_OPS]               = "struct_ops",
 162         [BPF_MAP_TYPE_RINGBUF]                  = "ringbuf",
 163         [BPF_MAP_TYPE_INODE_STORAGE]            = "inode_storage",
 164         [BPF_MAP_TYPE_TASK_STORAGE]             = "task_storage",
 165         [BPF_MAP_TYPE_BLOOM_FILTER]             = "bloom_filter",
 166 };
 167
 168 static const char * const prog_type_name[] = {
 169         [BPF_PROG_TYPE_UNSPEC]                  = "unspec",
 170         [BPF_PROG_TYPE_SOCKET_FILTER]           = "socket_filter",
 171         [BPF_PROG_TYPE_KPROBE]                  = "kprobe",
 172         [BPF_PROG_TYPE_SCHED_CLS]               = "sched_cls",
 173         [BPF_PROG_TYPE_SCHED_ACT]               = "sched_act",
 174         [BPF_PROG_TYPE_TRACEPOINT]              = "tracepoint",
 175         [BPF_PROG_TYPE_XDP]                     = "xdp",
 176         [BPF_PROG_TYPE_PERF_EVENT]              = "perf_event",
 177         [BPF_PROG_TYPE_CGROUP_SKB]              = "cgroup_skb",
 178         [BPF_PROG_TYPE_CGROUP_SOCK]             = "cgroup_sock",
 179         [BPF_PROG_TYPE_LWT_IN]                  = "lwt_in",
 180         [BPF_PROG_TYPE_LWT_OUT]                 = "lwt_out",
 181         [BPF_PROG_TYPE_LWT_XMIT]                = "lwt_xmit",
 182         [BPF_PROG_TYPE_SOCK_OPS]                = "sock_ops",
 183         [BPF_PROG_TYPE_SK_SKB]                  = "sk_skb",
 184         [BPF_PROG_TYPE_CGROUP_DEVICE]           = "cgroup_device",
 185         [BPF_PROG_TYPE_SK_MSG]                  = "sk_msg",
 186         [BPF_PROG_TYPE_RAW_TRACEPOINT]          = "raw_tracepoint",
 187         [BPF_PROG_TYPE_CGROUP_SOCK_ADDR]        = "cgroup_sock_addr",
 188         [BPF_PROG_TYPE_LWT_SEG6LOCAL]           = "lwt_seg6local",
 189         [BPF_PROG_TYPE_LIRC_MODE2]              = "lirc_mode2",
 190         [BPF_PROG_TYPE_SK_REUSEPORT]            = "sk_reuseport",
 191         [BPF_PROG_TYPE_FLOW_DISSECTOR]          = "flow_dissector",
 192         [BPF_PROG_TYPE_CGROUP_SYSCTL]           = "cgroup_sysctl",
 193         [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
 194         [BPF_PROG_TYPE_CGROUP_SOCKOPT]          = "cgroup_sockopt",
 195         [BPF_PROG_TYPE_TRACING]                 = "tracing",
 196         [BPF_PROG_TYPE_STRUCT_OPS]              = "struct_ops",
 197         [BPF_PROG_TYPE_EXT]                     = "ext",
 198         [BPF_PROG_TYPE_LSM]                     = "lsm",
 199         [BPF_PROG_TYPE_SK_LOOKUP]               = "sk_lookup",
 200         [BPF_PROG_TYPE_SYSCALL]                 = "syscall",
 201 };
 202
 203 static int __base_pr(enum libbpf_print_level level, const char *format,
 204                      va_list args)
 205 {
 206         if (level == LIBBPF_DEBUG)
 207                 return 0;
 208
 209         return vfprintf(stderr, format, args);
 210 }
 211
 212 static libbpf_print_fn_t __libbpf_pr = __base_pr;
 213
 214 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
 215 {
 216         libbpf_print_fn_t old_print_fn = __libbpf_pr;
 217
 218         __libbpf_pr = fn;
 219         return old_print_fn;
 220 }
 221
 222 __printf(2, 3)
 223 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
 224 {
 225         va_list args;
 226
 227         if (!__libbpf_pr)
 228                 return;
 229
 230         va_start(args, format);
 231         __libbpf_pr(level, format, args);
 232         va_end(args);
 233 }
 234
 235 static void pr_perm_msg(int err)
 236 {
 237         struct rlimit limit;
 238         char buf[100];
 239
 240         if (err != -EPERM || geteuid() != 0)
 241                 return;
 242
 243         err = getrlimit(RLIMIT_MEMLOCK, &limit);
 244         if (err)
 245                 return;
 246
 247         if (limit.rlim_cur == RLIM_INFINITY)
 248                 return;
 249
 250         if (limit.rlim_cur < 1024)
 251                 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
 252         else if (limit.rlim_cur < 1024*1024)
 253                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
 254         else
 255                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
 256
 257         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
 258                 buf);
 259 }
 260
 261 #define STRERR_BUFSIZE  128
 262
 263 /* Copied from tools/perf/util/util.h */
 264 #ifndef zfree
 265 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
 266 #endif
 267
 268 #ifndef zclose
 269 # define zclose(fd) ({                  \
 270         int ___err = 0;                 \
 271         if ((fd) >= 0)                  \
 272                 ___err = close((fd));   \
 273         fd = -1;                        \
 274         ___err; })
 275 #endif
 276
 277 static inline __u64 ptr_to_u64(const void *ptr)
 278 {
 279         return (__u64) (unsigned long) ptr;
 280 }
 281
 282 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
 283 {
 284         /* as of v1.0 libbpf_set_strict_mode() is a no-op */
 285         return 0;
 286 }
 287
 288 __u32 libbpf_major_version(void)
 289 {
 290         return LIBBPF_MAJOR_VERSION;
 291 }
 292
 293 __u32 libbpf_minor_version(void)
 294 {
 295         return LIBBPF_MINOR_VERSION;
 296 }
 297
 298 const char *libbpf_version_string(void)
 299 {
 300 #define __S(X) #X
 301 #define _S(X) __S(X)
 302         return  "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
 303 #undef _S
 304 #undef __S
 305 }
 306
 307 enum reloc_type {
 308         RELO_LD64,
 309         RELO_CALL,
 310         RELO_DATA,
 311         RELO_EXTERN_VAR,
 312         RELO_EXTERN_FUNC,
 313         RELO_SUBPROG_ADDR,
 314         RELO_CORE,
 315 };
 316
 317 struct reloc_desc {
 318         enum reloc_type type;
 319         int insn_idx;
 320         union {
 321                 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
 322                 struct {
 323                         int map_idx;
 324                         int sym_off;
 325                 };
 326         };
 327 };
 328
 329 /* stored as sec_def->cookie for all libbpf-supported SEC()s */
 330 enum sec_def_flags {
 331         SEC_NONE = 0,
 332         /* expected_attach_type is optional, if kernel doesn't support that */
 333         SEC_EXP_ATTACH_OPT = 1,
 334         /* legacy, only used by libbpf_get_type_names() and
 335          * libbpf_attach_type_by_name(), not used by libbpf itself at all.
 336          * This used to be associated with cgroup (and few other) BPF programs
 337          * that were attachable through BPF_PROG_ATTACH command. Pretty
 338          * meaningless nowadays, though.
 339          */
 340         SEC_ATTACHABLE = 2,
 341         SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
 342         /* attachment target is specified through BTF ID in either kernel or
 343          * other BPF program's BTF object */
 344         SEC_ATTACH_BTF = 4,
 345         /* BPF program type allows sleeping/blocking in kernel */
 346         SEC_SLEEPABLE = 8,
 347         /* BPF program support non-linear XDP buffer */
 348         SEC_XDP_FRAGS = 16,
 349 };
 350
 351 struct bpf_sec_def {
 352         char *sec;
 353         enum bpf_prog_type prog_type;
 354         enum bpf_attach_type expected_attach_type;
 355         long cookie;
 356         int handler_id;
 357
 358         libbpf_prog_setup_fn_t prog_setup_fn;
 359         libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
 360         libbpf_prog_attach_fn_t prog_attach_fn;
 361 };
 362
 363 /*
 364  * bpf_prog should be a better name but it has been used in
 365  * linux/filter.h.
 366  */
 367 struct bpf_program {
 368         char *name;
 369         char *sec_name;
 370         size_t sec_idx;
 371         const struct bpf_sec_def *sec_def;
 372         /* this program's instruction offset (in number of instructions)
 373          * within its containing ELF section
 374          */
 375         size_t sec_insn_off;
 376         /* number of original instructions in ELF section belonging to this
 377          * program, not taking into account subprogram instructions possible
 378          * appended later during relocation
 379          */
 380         size_t sec_insn_cnt;
 381         /* Offset (in number of instructions) of the start of instruction
 382          * belonging to this BPF program  within its containing main BPF
 383          * program. For the entry-point (main) BPF program, this is always
 384          * zero. For a sub-program, this gets reset before each of main BPF
 385          * programs are processed and relocated and is used to determined
 386          * whether sub-program was already appended to the main program, and
 387          * if yes, at which instruction offset.
 388          */
 389         size_t sub_insn_off;
 390
 391         /* instructions that belong to BPF program; insns[0] is located at
 392          * sec_insn_off instruction within its ELF section in ELF file, so
 393          * when mapping ELF file instruction index to the local instruction,
 394          * one needs to subtract sec_insn_off; and vice versa.
 395          */
 396         struct bpf_insn *insns;
 397         /* actual number of instruction in this BPF program's image; for
 398          * entry-point BPF programs this includes the size of main program
 399          * itself plus all the used sub-programs, appended at the end
 400          */
 401         size_t insns_cnt;
 402
 403         struct reloc_desc *reloc_desc;
 404         int nr_reloc;
 405
 406         /* BPF verifier log settings */
 407         char *log_buf;
 408         size_t log_size;
 409         __u32 log_level;
 410
 411         struct bpf_object *obj;
 412
 413         int fd;
 414         bool autoload;
 415         bool mark_btf_static;
 416         enum bpf_prog_type type;
 417         enum bpf_attach_type expected_attach_type;
 418
 419         int prog_ifindex;
 420         __u32 attach_btf_obj_fd;
 421         __u32 attach_btf_id;
 422         __u32 attach_prog_fd;
 423
 424         void *func_info;
 425         __u32 func_info_rec_size;
 426         __u32 func_info_cnt;
 427
 428         void *line_info;
 429         __u32 line_info_rec_size;
 430         __u32 line_info_cnt;
 431         __u32 prog_flags;
 432 };
 433
 434 struct bpf_struct_ops {
 435         const char *tname;
 436         const struct btf_type *type;
 437         struct bpf_program **progs;
 438         __u32 *kern_func_off;
 439         /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
 440         void *data;
 441         /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
 442          *      btf_vmlinux's format.
 443          * struct bpf_struct_ops_tcp_congestion_ops {
 444          *      [... some other kernel fields ...]
 445          *      struct tcp_congestion_ops data;
 446          * }
 447          * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
 448          * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
 449          * from "data".
 450          */
 451         void *kern_vdata;
 452         __u32 type_id;
 453 };
 454
 455 #define DATA_SEC ".data"
 456 #define BSS_SEC ".bss"
 457 #define RODATA_SEC ".rodata"
 458 #define KCONFIG_SEC ".kconfig"
 459 #define KSYMS_SEC ".ksyms"
 460 #define STRUCT_OPS_SEC ".struct_ops"
 461
 462 enum libbpf_map_type {
 463         LIBBPF_MAP_UNSPEC,
 464         LIBBPF_MAP_DATA,
 465         LIBBPF_MAP_BSS,
 466         LIBBPF_MAP_RODATA,
 467         LIBBPF_MAP_KCONFIG,
 468 };
 469
 470 struct bpf_map_def {
 471         unsigned int type;
 472         unsigned int key_size;
 473         unsigned int value_size;
 474         unsigned int max_entries;
 475         unsigned int map_flags;
 476 };
 477
 478 struct bpf_map {
 479         struct bpf_object *obj;
 480         char *name;
 481         /* real_name is defined for special internal maps (.rodata*,
 482          * .data*, .bss, .kconfig) and preserves their original ELF section
 483          * name. This is important to be be able to find corresponding BTF
 484          * DATASEC information.
 485          */
 486         char *real_name;
 487         int fd;
 488         int sec_idx;
 489         size_t sec_offset;
 490         int map_ifindex;
 491         int inner_map_fd;
 492         struct bpf_map_def def;
 493         __u32 numa_node;
 494         __u32 btf_var_idx;
 495         __u32 btf_key_type_id;
 496         __u32 btf_value_type_id;
 497         __u32 btf_vmlinux_value_type_id;
 498         enum libbpf_map_type libbpf_type;
 499         void *mmaped;
 500         struct bpf_struct_ops *st_ops;
 501         struct bpf_map *inner_map;
 502         void **init_slots;
 503         int init_slots_sz;
 504         char *pin_path;
 505         bool pinned;
 506         bool reused;
 507         bool autocreate;
 508         __u64 map_extra;
 509 };
 510
 511 enum extern_type {
 512         EXT_UNKNOWN,
 513         EXT_KCFG,
 514         EXT_KSYM,
 515 };
 516
 517 enum kcfg_type {
 518         KCFG_UNKNOWN,
 519         KCFG_CHAR,
 520         KCFG_BOOL,
 521         KCFG_INT,
 522         KCFG_TRISTATE,
 523         KCFG_CHAR_ARR,
 524 };
 525
 526 struct extern_desc {
 527         enum extern_type type;
 528         int sym_idx;
 529         int btf_id;
 530         int sec_btf_id;
 531         const char *name;
 532         bool is_set;
 533         bool is_weak;
 534         union {
 535                 struct {
 536                         enum kcfg_type type;
 537                         int sz;
 538                         int align;
 539                         int data_off;
 540                         bool is_signed;
 541                 } kcfg;
 542                 struct {
 543                         unsigned long long addr;
 544
 545                         /* target btf_id of the corresponding kernel var. */
 546                         int kernel_btf_obj_fd;
 547                         int kernel_btf_id;
 548
 549                         /* local btf_id of the ksym extern's type. */
 550                         __u32 type_id;
 551                         /* BTF fd index to be patched in for insn->off, this is
 552                          * 0 for vmlinux BTF, index in obj->fd_array for module
 553                          * BTF
 554                          */
 555                         __s16 btf_fd_idx;
 556                 } ksym;
 557         };
 558 };
 559
 560 struct module_btf {
 561         struct btf *btf;
 562         char *name;
 563         __u32 id;
 564         int fd;
 565         int fd_array_idx;
 566 };
 567
 568 enum sec_type {
 569         SEC_UNUSED = 0,
 570         SEC_RELO,
 571         SEC_BSS,
 572         SEC_DATA,
 573         SEC_RODATA,
 574 };
 575
 576 struct elf_sec_desc {
 577         enum sec_type sec_type;
 578         Elf64_Shdr *shdr;
 579         Elf_Data *data;
 580 };
 581
 582 struct elf_state {
 583         int fd;
 584         const void *obj_buf;
 585         size_t obj_buf_sz;
 586         Elf *elf;
 587         Elf64_Ehdr *ehdr;
 588         Elf_Data *symbols;
 589         Elf_Data *st_ops_data;
 590         size_t shstrndx; /* section index for section name strings */
 591         size_t strtabidx;
 592         struct elf_sec_desc *secs;
 593         int sec_cnt;
 594         int maps_shndx;
 595         int btf_maps_shndx;
 596         __u32 btf_maps_sec_btf_id;
 597         int text_shndx;
 598         int symbols_shndx;
 599         int st_ops_shndx;
 600 };
 601
 602 struct usdt_manager;
 603
 604 struct bpf_object {
 605         char name[BPF_OBJ_NAME_LEN];
 606         char license[64];
 607         __u32 kern_version;
 608
 609         struct bpf_program *programs;
 610         size_t nr_programs;
 611         struct bpf_map *maps;
 612         size_t nr_maps;
 613         size_t maps_cap;
 614
 615         char *kconfig;
 616         struct extern_desc *externs;
 617         int nr_extern;
 618         int kconfig_map_idx;
 619
 620         bool loaded;
 621         bool has_subcalls;
 622         bool has_rodata;
 623
 624         struct bpf_gen *gen_loader;
 625
 626         /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
 627         struct elf_state efile;
 628
 629         struct btf *btf;
 630         struct btf_ext *btf_ext;
 631
 632         /* Parse and load BTF vmlinux if any of the programs in the object need
 633          * it at load time.
 634          */
 635         struct btf *btf_vmlinux;
 636         /* Path to the custom BTF to be used for BPF CO-RE relocations as an
 637          * override for vmlinux BTF.
 638          */
 639         char *btf_custom_path;
 640         /* vmlinux BTF override for CO-RE relocations */
 641         struct btf *btf_vmlinux_override;
 642         /* Lazily initialized kernel module BTFs */
 643         struct module_btf *btf_modules;
 644         bool btf_modules_loaded;
 645         size_t btf_module_cnt;
 646         size_t btf_module_cap;
 647
 648         /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
 649         char *log_buf;
 650         size_t log_size;
 651         __u32 log_level;
 652
 653         int *fd_array;
 654         size_t fd_array_cap;
 655         size_t fd_array_cnt;
 656
 657         struct usdt_manager *usdt_man;
 658
 659         char path[];
 660 };
 661
 662 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
 663 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
 664 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
 665 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
 666 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
 667 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
 668 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
 669 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
 670 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
 671
 672 void bpf_program__unload(struct bpf_program *prog)
 673 {
 674         if (!prog)
 675                 return;
 676
 677         zclose(prog->fd);
 678
 679         zfree(&prog->func_info);
 680         zfree(&prog->line_info);
 681 }
 682
 683 static void bpf_program__exit(struct bpf_program *prog)
 684 {
 685         if (!prog)
 686                 return;
 687
 688         bpf_program__unload(prog);
 689         zfree(&prog->name);
 690         zfree(&prog->sec_name);
 691         zfree(&prog->insns);
 692         zfree(&prog->reloc_desc);
 693
 694         prog->nr_reloc = 0;
 695         prog->insns_cnt = 0;
 696         prog->sec_idx = -1;
 697 }
 698
 699 static bool insn_is_subprog_call(const struct bpf_insn *insn)
 700 {
 701         return BPF_CLASS(insn->code) == BPF_JMP &&
 702                BPF_OP(insn->code) == BPF_CALL &&
 703                BPF_SRC(insn->code) == BPF_K &&
 704                insn->src_reg == BPF_PSEUDO_CALL &&
 705                insn->dst_reg == 0 &&
 706                insn->off == 0;
 707 }
 708
 709 static bool is_call_insn(const struct bpf_insn *insn)
 710 {
 711         return insn->code == (BPF_JMP | BPF_CALL);
 712 }
 713
 714 static bool insn_is_pseudo_func(struct bpf_insn *insn)
 715 {
 716         return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
 717 }
 718
 719 static int
 720 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 721                       const char *name, size_t sec_idx, const char *sec_name,
 722                       size_t sec_off, void *insn_data, size_t insn_data_sz)
 723 {
 724         if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
 725                 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
 726                         sec_name, name, sec_off, insn_data_sz);
 727                 return -EINVAL;
 728         }
 729
 730         memset(prog, 0, sizeof(*prog));
 731         prog->obj = obj;
 732
 733         prog->sec_idx = sec_idx;
 734         prog->sec_insn_off = sec_off / BPF_INSN_SZ;
 735         prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
 736         /* insns_cnt can later be increased by appending used subprograms */
 737         prog->insns_cnt = prog->sec_insn_cnt;
 738
 739         prog->type = BPF_PROG_TYPE_UNSPEC;
 740         prog->fd = -1;
 741
 742         /* libbpf's convention for SEC("?abc...") is that it's just like
 743          * SEC("abc...") but the corresponding bpf_program starts out with
 744          * autoload set to false.
 745          */
 746         if (sec_name[0] == '?') {
 747                 prog->autoload = false;
 748                 /* from now on forget there was ? in section name */
 749                 sec_name++;
 750         } else {
 751                 prog->autoload = true;
 752         }
 753
 754         /* inherit object's log_level */
 755         prog->log_level = obj->log_level;
 756
 757         prog->sec_name = strdup(sec_name);
 758         if (!prog->sec_name)
 759                 goto errout;
 760
 761         prog->name = strdup(name);
 762         if (!prog->name)
 763                 goto errout;
 764
 765         prog->insns = malloc(insn_data_sz);
 766         if (!prog->insns)
 767                 goto errout;
 768         memcpy(prog->insns, insn_data, insn_data_sz);
 769
 770         return 0;
 771 errout:
 772         pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
 773         bpf_program__exit(prog);
 774         return -ENOMEM;
 775 }
 776
 777 static int
 778 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 779                          const char *sec_name, int sec_idx)
 780 {
 781         Elf_Data *symbols = obj->efile.symbols;
 782         struct bpf_program *prog, *progs;
 783         void *data = sec_data->d_buf;
 784         size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
 785         int nr_progs, err, i;
 786         const char *name;
 787         Elf64_Sym *sym;
 788
 789         progs = obj->programs;
 790         nr_progs = obj->nr_programs;
 791         nr_syms = symbols->d_size / sizeof(Elf64_Sym);
 792         sec_off = 0;
 793
 794         for (i = 0; i < nr_syms; i++) {
 795                 sym = elf_sym_by_idx(obj, i);
 796
 797                 if (sym->st_shndx != sec_idx)
 798                         continue;
 799                 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
 800                         continue;
 801
 802                 prog_sz = sym->st_size;
 803                 sec_off = sym->st_value;
 804
 805                 name = elf_sym_str(obj, sym->st_name);
 806                 if (!name) {
 807                         pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
 808                                 sec_name, sec_off);
 809                         return -LIBBPF_ERRNO__FORMAT;
 810                 }
 811
 812                 if (sec_off + prog_sz > sec_sz) {
 813                         pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
 814                                 sec_name, sec_off);
 815                         return -LIBBPF_ERRNO__FORMAT;
 816                 }
 817
 818                 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
 819                         pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
 820                         return -ENOTSUP;
 821                 }
 822
 823                 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
 824                          sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
 825
 826                 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
 827                 if (!progs) {
 828                         /*
 829                          * In this case the original obj->programs
 830                          * is still valid, so don't need special treat for
 831                          * bpf_close_object().
 832                          */
 833                         pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
 834                                 sec_name, name);
 835                         return -ENOMEM;
 836                 }
 837                 obj->programs = progs;
 838
 839                 prog = &progs[nr_progs];
 840
 841                 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
 842                                             sec_off, data + sec_off, prog_sz);
 843                 if (err)
 844                         return err;
 845
 846                 /* if function is a global/weak symbol, but has restricted
 847                  * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
 848                  * as static to enable more permissive BPF verification mode
 849                  * with more outside context available to BPF verifier
 850                  */
 851                 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL
 852                     && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
 853                         || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
 854                         prog->mark_btf_static = true;
 855
 856                 nr_progs++;
 857                 obj->nr_programs = nr_progs;
 858         }
 859
 860         return 0;
 861 }
 862
 863 __u32 get_kernel_version(void)
 864 {
 865         /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release,
 866          * but Ubuntu provides /proc/version_signature file, as described at
 867          * https://ubuntu.com/kernel, with an example contents below, which we
 868          * can use to get a proper LINUX_VERSION_CODE.
 869          *
 870          *   Ubuntu 5.4.0-12.15-generic 5.4.8
 871          *
 872          * In the above, 5.4.8 is what kernel is actually expecting, while
 873          * uname() call will return 5.4.0 in info.release.
 874          */
 875         const char *ubuntu_kver_file = "/proc/version_signature";
 876         __u32 major, minor, patch;
 877         struct utsname info;
 878
 879         if (access(ubuntu_kver_file, R_OK) == 0) {
 880                 FILE *f;
 881
 882                 f = fopen(ubuntu_kver_file, "r");
 883                 if (f) {
 884                         if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) {
 885                                 fclose(f);
 886                                 return KERNEL_VERSION(major, minor, patch);
 887                         }
 888                         fclose(f);
 889                 }
 890                 /* something went wrong, fall back to uname() approach */
 891         }
 892
 893         uname(&info);
 894         if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
 895                 return 0;
 896         return KERNEL_VERSION(major, minor, patch);
 897 }
 898
 899 static const struct btf_member *
 900 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
 901 {
 902         struct btf_member *m;
 903         int i;
 904
 905         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
 906                 if (btf_member_bit_offset(t, i) == bit_offset)
 907                         return m;
 908         }
 909
 910         return NULL;
 911 }
 912
 913 static const struct btf_member *
 914 find_member_by_name(const struct btf *btf, const struct btf_type *t,
 915                     const char *name)
 916 {
 917         struct btf_member *m;
 918         int i;
 919
 920         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
 921                 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
 922                         return m;
 923         }
 924
 925         return NULL;
 926 }
 927
 928 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
 929 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
 930                                    const char *name, __u32 kind);
 931
 932 static int
 933 find_struct_ops_kern_types(const struct btf *btf, const char *tname,
 934                            const struct btf_type **type, __u32 *type_id,
 935                            const struct btf_type **vtype, __u32 *vtype_id,
 936                            const struct btf_member **data_member)
 937 {
 938         const struct btf_type *kern_type, *kern_vtype;
 939         const struct btf_member *kern_data_member;
 940         __s32 kern_vtype_id, kern_type_id;
 941         __u32 i;
 942
 943         kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
 944         if (kern_type_id < 0) {
 945                 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
 946                         tname);
 947                 return kern_type_id;
 948         }
 949         kern_type = btf__type_by_id(btf, kern_type_id);
 950
 951         /* Find the corresponding "map_value" type that will be used
 952          * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
 953          * find "struct bpf_struct_ops_tcp_congestion_ops" from the
 954          * btf_vmlinux.
 955          */
 956         kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
 957                                                 tname, BTF_KIND_STRUCT);
 958         if (kern_vtype_id < 0) {
 959                 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
 960                         STRUCT_OPS_VALUE_PREFIX, tname);
 961                 return kern_vtype_id;
 962         }
 963         kern_vtype = btf__type_by_id(btf, kern_vtype_id);
 964
 965         /* Find "struct tcp_congestion_ops" from
 966          * struct bpf_struct_ops_tcp_congestion_ops {
 967          *      [ ... ]
 968          *      struct tcp_congestion_ops data;
 969          * }
 970          */
 971         kern_data_member = btf_members(kern_vtype);
 972         for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
 973                 if (kern_data_member->type == kern_type_id)
 974                         break;
 975         }
 976         if (i == btf_vlen(kern_vtype)) {
 977                 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
 978                         tname, STRUCT_OPS_VALUE_PREFIX, tname);
 979                 return -EINVAL;
 980         }
 981
 982         *type = kern_type;
 983         *type_id = kern_type_id;
 984         *vtype = kern_vtype;
 985         *vtype_id = kern_vtype_id;
 986         *data_member = kern_data_member;
 987
 988         return 0;
 989 }
 990
 991 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
 992 {
 993         return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
 994 }
 995
 996 /* Init the map's fields that depend on kern_btf */
 997 static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
 998                                          const struct btf *btf,
 999                                          const struct btf *kern_btf)
1000 {
1001         const struct btf_member *member, *kern_member, *kern_data_member;
1002         const struct btf_type *type, *kern_type, *kern_vtype;
1003         __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
1004         struct bpf_struct_ops *st_ops;
1005         void *data, *kern_data;
1006         const char *tname;
1007         int err;
1008
1009         st_ops = map->st_ops;
1010         type = st_ops->type;
1011         tname = st_ops->tname;
1012         err = find_struct_ops_kern_types(kern_btf, tname,
1013                                          &kern_type, &kern_type_id,
1014                                          &kern_vtype, &kern_vtype_id,
1015                                          &kern_data_member);
1016         if (err)
1017                 return err;
1018
1019         pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1020                  map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1021
1022         map->def.value_size = kern_vtype->size;
1023         map->btf_vmlinux_value_type_id = kern_vtype_id;
1024
1025         st_ops->kern_vdata = calloc(1, kern_vtype->size);
1026         if (!st_ops->kern_vdata)
1027                 return -ENOMEM;
1028
1029         data = st_ops->data;
1030         kern_data_off = kern_data_member->offset / 8;
1031         kern_data = st_ops->kern_vdata + kern_data_off;
1032
1033         member = btf_members(type);
1034         for (i = 0; i < btf_vlen(type); i++, member++) {
1035                 const struct btf_type *mtype, *kern_mtype;
1036                 __u32 mtype_id, kern_mtype_id;
1037                 void *mdata, *kern_mdata;
1038                 __s64 msize, kern_msize;
1039                 __u32 moff, kern_moff;
1040                 __u32 kern_member_idx;
1041                 const char *mname;
1042
1043                 mname = btf__name_by_offset(btf, member->name_off);
1044                 kern_member = find_member_by_name(kern_btf, kern_type, mname);
1045                 if (!kern_member) {
1046                         pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1047                                 map->name, mname);
1048                         return -ENOTSUP;
1049                 }
1050
1051                 kern_member_idx = kern_member - btf_members(kern_type);
1052                 if (btf_member_bitfield_size(type, i) ||
1053                     btf_member_bitfield_size(kern_type, kern_member_idx)) {
1054                         pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1055                                 map->name, mname);
1056                         return -ENOTSUP;
1057                 }
1058
1059                 moff = member->offset / 8;
1060                 kern_moff = kern_member->offset / 8;
1061
1062                 mdata = data + moff;
1063                 kern_mdata = kern_data + kern_moff;
1064
1065                 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1066                 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1067                                                     &kern_mtype_id);
1068                 if (BTF_INFO_KIND(mtype->info) !=
1069                     BTF_INFO_KIND(kern_mtype->info)) {
1070                         pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1071                                 map->name, mname, BTF_INFO_KIND(mtype->info),
1072                                 BTF_INFO_KIND(kern_mtype->info));
1073                         return -ENOTSUP;
1074                 }
1075
1076                 if (btf_is_ptr(mtype)) {
1077                         struct bpf_program *prog;
1078
1079                         prog = st_ops->progs[i];
1080                         if (!prog)
1081                                 continue;
1082
1083                         kern_mtype = skip_mods_and_typedefs(kern_btf,
1084                                                             kern_mtype->type,
1085                                                             &kern_mtype_id);
1086
1087                         /* mtype->type must be a func_proto which was
1088                          * guaranteed in bpf_object__collect_st_ops_relos(),
1089                          * so only check kern_mtype for func_proto here.
1090                          */
1091                         if (!btf_is_func_proto(kern_mtype)) {
1092                                 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1093                                         map->name, mname);
1094                                 return -ENOTSUP;
1095                         }
1096
1097                         prog->attach_btf_id = kern_type_id;
1098                         prog->expected_attach_type = kern_member_idx;
1099
1100                         st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1101
1102                         pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1103                                  map->name, mname, prog->name, moff,
1104                                  kern_moff);
1105
1106                         continue;
1107                 }
1108
1109                 msize = btf__resolve_size(btf, mtype_id);
1110                 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1111                 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
1112                         pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1113                                 map->name, mname, (ssize_t)msize,
1114                                 (ssize_t)kern_msize);
1115                         return -ENOTSUP;
1116                 }
1117
1118                 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1119                          map->name, mname, (unsigned int)msize,
1120                          moff, kern_moff);
1121                 memcpy(kern_mdata, mdata, msize);
1122         }
1123
1124         return 0;
1125 }
1126
1127 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1128 {
1129         struct bpf_map *map;
1130         size_t i;
1131         int err;
1132
1133         for (i = 0; i < obj->nr_maps; i++) {
1134                 map = &obj->maps[i];
1135
1136                 if (!bpf_map__is_struct_ops(map))
1137                         continue;
1138
1139                 err = bpf_map__init_kern_struct_ops(map, obj->btf,
1140                                                     obj->btf_vmlinux);
1141                 if (err)
1142                         return err;
1143         }
1144
1145         return 0;
1146 }
1147
1148 static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
1149 {
1150         const struct btf_type *type, *datasec;
1151         const struct btf_var_secinfo *vsi;
1152         struct bpf_struct_ops *st_ops;
1153         const char *tname, *var_name;
1154         __s32 type_id, datasec_id;
1155         const struct btf *btf;
1156         struct bpf_map *map;
1157         __u32 i;
1158
1159         if (obj->efile.st_ops_shndx == -1)
1160                 return 0;
1161
1162         btf = obj->btf;
1163         datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
1164                                             BTF_KIND_DATASEC);
1165         if (datasec_id < 0) {
1166                 pr_warn("struct_ops init: DATASEC %s not found\n",
1167                         STRUCT_OPS_SEC);
1168                 return -EINVAL;
1169         }
1170
1171         datasec = btf__type_by_id(btf, datasec_id);
1172         vsi = btf_var_secinfos(datasec);
1173         for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1174                 type = btf__type_by_id(obj->btf, vsi->type);
1175                 var_name = btf__name_by_offset(obj->btf, type->name_off);
1176
1177                 type_id = btf__resolve_type(obj->btf, vsi->type);
1178                 if (type_id < 0) {
1179                         pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1180                                 vsi->type, STRUCT_OPS_SEC);
1181                         return -EINVAL;
1182                 }
1183
1184                 type = btf__type_by_id(obj->btf, type_id);
1185                 tname = btf__name_by_offset(obj->btf, type->name_off);
1186                 if (!tname[0]) {
1187                         pr_warn("struct_ops init: anonymous type is not supported\n");
1188                         return -ENOTSUP;
1189                 }
1190                 if (!btf_is_struct(type)) {
1191                         pr_warn("struct_ops init: %s is not a struct\n", tname);
1192                         return -EINVAL;
1193                 }
1194
1195                 map = bpf_object__add_map(obj);
1196                 if (IS_ERR(map))
1197                         return PTR_ERR(map);
1198
1199                 map->sec_idx = obj->efile.st_ops_shndx;
1200                 map->sec_offset = vsi->offset;
1201                 map->name = strdup(var_name);
1202                 if (!map->name)
1203                         return -ENOMEM;
1204
1205                 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1206                 map->def.key_size = sizeof(int);
1207                 map->def.value_size = type->size;
1208                 map->def.max_entries = 1;
1209
1210                 map->st_ops = calloc(1, sizeof(*map->st_ops));
1211                 if (!map->st_ops)
1212                         return -ENOMEM;
1213                 st_ops = map->st_ops;
1214                 st_ops->data = malloc(type->size);
1215                 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1216                 st_ops->kern_func_off = malloc(btf_vlen(type) *
1217                                                sizeof(*st_ops->kern_func_off));
1218                 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1219                         return -ENOMEM;
1220
1221                 if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
1222                         pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1223                                 var_name, STRUCT_OPS_SEC);
1224                         return -EINVAL;
1225                 }
1226
1227                 memcpy(st_ops->data,
1228                        obj->efile.st_ops_data->d_buf + vsi->offset,
1229                        type->size);
1230                 st_ops->tname = tname;
1231                 st_ops->type = type;
1232                 st_ops->type_id = type_id;
1233
1234                 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1235                          tname, type_id, var_name, vsi->offset);
1236         }
1237
1238         return 0;
1239 }
1240
1241 static struct bpf_object *bpf_object__new(const char *path,
1242                                           const void *obj_buf,
1243                                           size_t obj_buf_sz,
1244                                           const char *obj_name)
1245 {
1246         struct bpf_object *obj;
1247         char *end;
1248
1249         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1250         if (!obj) {
1251                 pr_warn("alloc memory failed for %s\n", path);
1252                 return ERR_PTR(-ENOMEM);
1253         }
1254
1255         strcpy(obj->path, path);
1256         if (obj_name) {
1257                 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1258         } else {
1259                 /* Using basename() GNU version which doesn't modify arg. */
1260                 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1261                 end = strchr(obj->name, '.');
1262                 if (end)
1263                         *end = 0;
1264         }
1265
1266         obj->efile.fd = -1;
1267         /*
1268          * Caller of this function should also call
1269          * bpf_object__elf_finish() after data collection to return
1270          * obj_buf to user. If not, we should duplicate the buffer to
1271          * avoid user freeing them before elf finish.
1272          */
1273         obj->efile.obj_buf = obj_buf;
1274         obj->efile.obj_buf_sz = obj_buf_sz;
1275         obj->efile.maps_shndx = -1;
1276         obj->efile.btf_maps_shndx = -1;
1277         obj->efile.st_ops_shndx = -1;
1278         obj->kconfig_map_idx = -1;
1279
1280         obj->kern_version = get_kernel_version();
1281         obj->loaded = false;
1282
1283         return obj;
1284 }
1285
1286 static void bpf_object__elf_finish(struct bpf_object *obj)
1287 {
1288         if (!obj->efile.elf)
1289                 return;
1290
1291         elf_end(obj->efile.elf);
1292         obj->efile.elf = NULL;
1293         obj->efile.symbols = NULL;
1294         obj->efile.st_ops_data = NULL;
1295
1296         zfree(&obj->efile.secs);
1297         obj->efile.sec_cnt = 0;
1298         zclose(obj->efile.fd);
1299         obj->efile.obj_buf = NULL;
1300         obj->efile.obj_buf_sz = 0;
1301 }
1302
1303 static int bpf_object__elf_init(struct bpf_object *obj)
1304 {
1305         Elf64_Ehdr *ehdr;
1306         int err = 0;
1307         Elf *elf;
1308
1309         if (obj->efile.elf) {
1310                 pr_warn("elf: init internal error\n");
1311                 return -LIBBPF_ERRNO__LIBELF;
1312         }
1313
1314         if (obj->efile.obj_buf_sz > 0) {
1315                 /* obj_buf should have been validated by bpf_object__open_mem(). */
1316                 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1317         } else {
1318                 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1319                 if (obj->efile.fd < 0) {
1320                         char errmsg[STRERR_BUFSIZE], *cp;
1321
1322                         err = -errno;
1323                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1324                         pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1325                         return err;
1326                 }
1327
1328                 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1329         }
1330
1331         if (!elf) {
1332                 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1333                 err = -LIBBPF_ERRNO__LIBELF;
1334                 goto errout;
1335         }
1336
1337         obj->efile.elf = elf;
1338
1339         if (elf_kind(elf) != ELF_K_ELF) {
1340                 err = -LIBBPF_ERRNO__FORMAT;
1341                 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1342                 goto errout;
1343         }
1344
1345         if (gelf_getclass(elf) != ELFCLASS64) {
1346                 err = -LIBBPF_ERRNO__FORMAT;
1347                 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1348                 goto errout;
1349         }
1350
1351         obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1352         if (!obj->efile.ehdr) {
1353                 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1354                 err = -LIBBPF_ERRNO__FORMAT;
1355                 goto errout;
1356         }
1357
1358         if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1359                 pr_warn("elf: failed to get section names section index for %s: %s\n",
1360                         obj->path, elf_errmsg(-1));
1361                 err = -LIBBPF_ERRNO__FORMAT;
1362                 goto errout;
1363         }
1364
1365         /* Elf is corrupted/truncated, avoid calling elf_strptr. */
1366         if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1367                 pr_warn("elf: failed to get section names strings from %s: %s\n",
1368                         obj->path, elf_errmsg(-1));
1369                 err = -LIBBPF_ERRNO__FORMAT;
1370                 goto errout;
1371         }
1372
1373         /* Old LLVM set e_machine to EM_NONE */
1374         if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1375                 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1376                 err = -LIBBPF_ERRNO__FORMAT;
1377                 goto errout;
1378         }
1379
1380         return 0;
1381 errout:
1382         bpf_object__elf_finish(obj);
1383         return err;
1384 }
1385
1386 static int bpf_object__check_endianness(struct bpf_object *obj)
1387 {
1388 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1389         if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
1390                 return 0;
1391 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1392         if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
1393                 return 0;
1394 #else
1395 # error "Unrecognized __BYTE_ORDER__"
1396 #endif
1397         pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1398         return -LIBBPF_ERRNO__ENDIAN;
1399 }
1400
1401 static int
1402 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1403 {
1404         /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1405          * go over allowed ELF data section buffer
1406          */
1407         libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1408         pr_debug("license of %s is %s\n", obj->path, obj->license);
1409         return 0;
1410 }
1411
1412 static int
1413 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1414 {
1415         __u32 kver;
1416
1417         if (size != sizeof(kver)) {
1418                 pr_warn("invalid kver section in %s\n", obj->path);
1419                 return -LIBBPF_ERRNO__FORMAT;
1420         }
1421         memcpy(&kver, data, sizeof(kver));
1422         obj->kern_version = kver;
1423         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1424         return 0;
1425 }
1426
1427 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1428 {
1429         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1430             type == BPF_MAP_TYPE_HASH_OF_MAPS)
1431                 return true;
1432         return false;
1433 }
1434
1435 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1436 {
1437         Elf_Data *data;
1438         Elf_Scn *scn;
1439
1440         if (!name)
1441                 return -EINVAL;
1442
1443         scn = elf_sec_by_name(obj, name);
1444         data = elf_sec_data(obj, scn);
1445         if (data) {
1446                 *size = data->d_size;
1447                 return 0; /* found it */
1448         }
1449
1450         return -ENOENT;
1451 }
1452
1453 static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off)
1454 {
1455         Elf_Data *symbols = obj->efile.symbols;
1456         const char *sname;
1457         size_t si;
1458
1459         if (!name || !off)
1460                 return -EINVAL;
1461
1462         for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1463                 Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1464
1465                 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1466                         continue;
1467
1468                 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1469                     ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1470                         continue;
1471
1472                 sname = elf_sym_str(obj, sym->st_name);
1473                 if (!sname) {
1474                         pr_warn("failed to get sym name string for var %s\n", name);
1475                         return -EIO;
1476                 }
1477                 if (strcmp(name, sname) == 0) {
1478                         *off = sym->st_value;
1479                         return 0;
1480                 }
1481         }
1482
1483         return -ENOENT;
1484 }
1485
1486 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1487 {
1488         struct bpf_map *map;
1489         int err;
1490
1491         err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1492                                 sizeof(*obj->maps), obj->nr_maps + 1);
1493         if (err)
1494                 return ERR_PTR(err);
1495
1496         map = &obj->maps[obj->nr_maps++];
1497         map->obj = obj;
1498         map->fd = -1;
1499         map->inner_map_fd = -1;
1500         map->autocreate = true;
1501
1502         return map;
1503 }
1504
1505 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1506 {
1507         long page_sz = sysconf(_SC_PAGE_SIZE);
1508         size_t map_sz;
1509
1510         map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
1511         map_sz = roundup(map_sz, page_sz);
1512         return map_sz;
1513 }
1514
1515 static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1516 {
1517         char map_name[BPF_OBJ_NAME_LEN], *p;
1518         int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1519
1520         /* This is one of the more confusing parts of libbpf for various
1521          * reasons, some of which are historical. The original idea for naming
1522          * internal names was to include as much of BPF object name prefix as
1523          * possible, so that it can be distinguished from similar internal
1524          * maps of a different BPF object.
1525          * As an example, let's say we have bpf_object named 'my_object_name'
1526          * and internal map corresponding to '.rodata' ELF section. The final
1527          * map name advertised to user and to the kernel will be
1528          * 'my_objec.rodata', taking first 8 characters of object name and
1529          * entire 7 characters of '.rodata'.
1530          * Somewhat confusingly, if internal map ELF section name is shorter
1531          * than 7 characters, e.g., '.bss', we still reserve 7 characters
1532          * for the suffix, even though we only have 4 actual characters, and
1533          * resulting map will be called 'my_objec.bss', not even using all 15
1534          * characters allowed by the kernel. Oh well, at least the truncated
1535          * object name is somewhat consistent in this case. But if the map
1536          * name is '.kconfig', we'll still have entirety of '.kconfig' added
1537          * (8 chars) and thus will be left with only first 7 characters of the
1538          * object name ('my_obje'). Happy guessing, user, that the final map
1539          * name will be "my_obje.kconfig".
1540          * Now, with libbpf starting to support arbitrarily named .rodata.*
1541          * and .data.* data sections, it's possible that ELF section name is
1542          * longer than allowed 15 chars, so we now need to be careful to take
1543          * only up to 15 first characters of ELF name, taking no BPF object
1544          * name characters at all. So '.rodata.abracadabra' will result in
1545          * '.rodata.abracad' kernel and user-visible name.
1546          * We need to keep this convoluted logic intact for .data, .bss and
1547          * .rodata maps, but for new custom .data.custom and .rodata.custom
1548          * maps we use their ELF names as is, not prepending bpf_object name
1549          * in front. We still need to truncate them to 15 characters for the
1550          * kernel. Full name can be recovered for such maps by using DATASEC
1551          * BTF type associated with such map's value type, though.
1552          */
1553         if (sfx_len >= BPF_OBJ_NAME_LEN)
1554                 sfx_len = BPF_OBJ_NAME_LEN - 1;
1555
1556         /* if there are two or more dots in map name, it's a custom dot map */
1557         if (strchr(real_name + 1, '.') != NULL)
1558                 pfx_len = 0;
1559         else
1560                 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1561
1562         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1563                  sfx_len, real_name);
1564
1565         /* sanitise map name to characters allowed by kernel */
1566         for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1567                 if (!isalnum(*p) && *p != '_' && *p != '.')
1568                         *p = '_';
1569
1570         return strdup(map_name);
1571 }
1572
1573 static int
1574 bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map);
1575
1576 static int
1577 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1578                               const char *real_name, int sec_idx, void *data, size_t data_sz)
1579 {
1580         struct bpf_map_def *def;
1581         struct bpf_map *map;
1582         int err;
1583
1584         map = bpf_object__add_map(obj);
1585         if (IS_ERR(map))
1586                 return PTR_ERR(map);
1587
1588         map->libbpf_type = type;
1589         map->sec_idx = sec_idx;
1590         map->sec_offset = 0;
1591         map->real_name = strdup(real_name);
1592         map->name = internal_map_name(obj, real_name);
1593         if (!map->real_name || !map->name) {
1594                 zfree(&map->real_name);
1595                 zfree(&map->name);
1596                 return -ENOMEM;
1597         }
1598
1599         def = &map->def;
1600         def->type = BPF_MAP_TYPE_ARRAY;
1601         def->key_size = sizeof(int);
1602         def->value_size = data_sz;
1603         def->max_entries = 1;
1604         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1605                          ? BPF_F_RDONLY_PROG : 0;
1606         def->map_flags |= BPF_F_MMAPABLE;
1607
1608         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1609                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
1610
1611         map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
1612                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1613         if (map->mmaped == MAP_FAILED) {
1614                 err = -errno;
1615                 map->mmaped = NULL;
1616                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1617                         map->name, err);
1618                 zfree(&map->real_name);
1619                 zfree(&map->name);
1620                 return err;
1621         }
1622
1623         /* failures are fine because of maps like .rodata.str1.1 */
1624         (void) bpf_map_find_btf_info(obj, map);
1625
1626         if (data)
1627                 memcpy(map->mmaped, data, data_sz);
1628
1629         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1630         return 0;
1631 }
1632
1633 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1634 {
1635         struct elf_sec_desc *sec_desc;
1636         const char *sec_name;
1637         int err = 0, sec_idx;
1638
1639         /*
1640          * Populate obj->maps with libbpf internal maps.
1641          */
1642         for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
1643                 sec_desc = &obj->efile.secs[sec_idx];
1644
1645                 switch (sec_desc->sec_type) {
1646                 case SEC_DATA:
1647                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1648                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1649                                                             sec_name, sec_idx,
1650                                                             sec_desc->data->d_buf,
1651                                                             sec_desc->data->d_size);
1652                         break;
1653                 case SEC_RODATA:
1654                         obj->has_rodata = true;
1655                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1656                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1657                                                             sec_name, sec_idx,
1658                                                             sec_desc->data->d_buf,
1659                                                             sec_desc->data->d_size);
1660                         break;
1661                 case SEC_BSS:
1662                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1663                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1664                                                             sec_name, sec_idx,
1665                                                             NULL,
1666                                                             sec_desc->data->d_size);
1667                         break;
1668                 default:
1669                         /* skip */
1670                         break;
1671                 }
1672                 if (err)
1673                         return err;
1674         }
1675         return 0;
1676 }
1677
1678
1679 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1680                                                const void *name)
1681 {
1682         int i;
1683
1684         for (i = 0; i < obj->nr_extern; i++) {
1685                 if (strcmp(obj->externs[i].name, name) == 0)
1686                         return &obj->externs[i];
1687         }
1688         return NULL;
1689 }
1690
1691 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1692                               char value)
1693 {
1694         switch (ext->kcfg.type) {
1695         case KCFG_BOOL:
1696                 if (value == 'm') {
1697                         pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
1698                                 ext->name, value);
1699                         return -EINVAL;
1700                 }
1701                 *(bool *)ext_val = value == 'y' ? true : false;
1702                 break;
1703         case KCFG_TRISTATE:
1704                 if (value == 'y')
1705                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1706                 else if (value == 'm')
1707                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1708                 else /* value == 'n' */
1709                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1710                 break;
1711         case KCFG_CHAR:
1712                 *(char *)ext_val = value;
1713                 break;
1714         case KCFG_UNKNOWN:
1715         case KCFG_INT:
1716         case KCFG_CHAR_ARR:
1717         default:
1718                 pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
1719                         ext->name, value);
1720                 return -EINVAL;
1721         }
1722         ext->is_set = true;
1723         return 0;
1724 }
1725
1726 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1727                               const char *value)
1728 {
1729         size_t len;
1730
1731         if (ext->kcfg.type != KCFG_CHAR_ARR) {
1732                 pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
1733                 return -EINVAL;
1734         }
1735
1736         len = strlen(value);
1737         if (value[len - 1] != '"') {
1738                 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1739                         ext->name, value);
1740                 return -EINVAL;
1741         }
1742
1743         /* strip quotes */
1744         len -= 2;
1745         if (len >= ext->kcfg.sz) {
1746                 pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
1747                         ext->name, value, len, ext->kcfg.sz - 1);
1748                 len = ext->kcfg.sz - 1;
1749         }
1750         memcpy(ext_val, value + 1, len);
1751         ext_val[len] = '\0';
1752         ext->is_set = true;
1753         return 0;
1754 }
1755
1756 static int parse_u64(const char *value, __u64 *res)
1757 {
1758         char *value_end;
1759         int err;
1760
1761         errno = 0;
1762         *res = strtoull(value, &value_end, 0);
1763         if (errno) {
1764                 err = -errno;
1765                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1766                 return err;
1767         }
1768         if (*value_end) {
1769                 pr_warn("failed to parse '%s' as integer completely\n", value);
1770                 return -EINVAL;
1771         }
1772         return 0;
1773 }
1774
1775 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1776 {
1777         int bit_sz = ext->kcfg.sz * 8;
1778
1779         if (ext->kcfg.sz == 8)
1780                 return true;
1781
1782         /* Validate that value stored in u64 fits in integer of `ext->sz`
1783          * bytes size without any loss of information. If the target integer
1784          * is signed, we rely on the following limits of integer type of
1785          * Y bits and subsequent transformation:
1786          *
1787          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1788          *            0 <= X + 2^(Y-1) <= 2^Y - 1
1789          *            0 <= X + 2^(Y-1) <  2^Y
1790          *
1791          *  For unsigned target integer, check that all the (64 - Y) bits are
1792          *  zero.
1793          */
1794         if (ext->kcfg.is_signed)
1795                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1796         else
1797                 return (v >> bit_sz) == 0;
1798 }
1799
1800 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1801                               __u64 value)
1802 {
1803         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1804                 pr_warn("extern (kcfg) %s=%llu should be integer\n",
1805                         ext->name, (unsigned long long)value);
1806                 return -EINVAL;
1807         }
1808         if (!is_kcfg_value_in_range(ext, value)) {
1809                 pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
1810                         ext->name, (unsigned long long)value, ext->kcfg.sz);
1811                 return -ERANGE;
1812         }
1813         switch (ext->kcfg.sz) {
1814                 case 1: *(__u8 *)ext_val = value; break;
1815                 case 2: *(__u16 *)ext_val = value; break;
1816                 case 4: *(__u32 *)ext_val = value; break;
1817                 case 8: *(__u64 *)ext_val = value; break;
1818                 default:
1819                         return -EINVAL;
1820         }
1821         ext->is_set = true;
1822         return 0;
1823 }
1824
1825 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1826                                             char *buf, void *data)
1827 {
1828         struct extern_desc *ext;
1829         char *sep, *value;
1830         int len, err = 0;
1831         void *ext_val;
1832         __u64 num;
1833
1834         if (!str_has_pfx(buf, "CONFIG_"))
1835                 return 0;
1836
1837         sep = strchr(buf, '=');
1838         if (!sep) {
1839                 pr_warn("failed to parse '%s': no separator\n", buf);
1840                 return -EINVAL;
1841         }
1842
1843         /* Trim ending '\n' */
1844         len = strlen(buf);
1845         if (buf[len - 1] == '\n')
1846                 buf[len - 1] = '\0';
1847         /* Split on '=' and ensure that a value is present. */
1848         *sep = '\0';
1849         if (!sep[1]) {
1850                 *sep = '=';
1851                 pr_warn("failed to parse '%s': no value\n", buf);
1852                 return -EINVAL;
1853         }
1854
1855         ext = find_extern_by_name(obj, buf);
1856         if (!ext || ext->is_set)
1857                 return 0;
1858
1859         ext_val = data + ext->kcfg.data_off;
1860         value = sep + 1;
1861
1862         switch (*value) {
1863         case 'y': case 'n': case 'm':
1864                 err = set_kcfg_value_tri(ext, ext_val, *value);
1865                 break;
1866         case '"':
1867                 err = set_kcfg_value_str(ext, ext_val, value);
1868                 break;
1869         default:
1870                 /* assume integer */
1871                 err = parse_u64(value, &num);
1872                 if (err) {
1873                         pr_warn("extern (kcfg) %s=%s should be integer\n",
1874                                 ext->name, value);
1875                         return err;
1876                 }
1877                 err = set_kcfg_value_num(ext, ext_val, num);
1878                 break;
1879         }
1880         if (err)
1881                 return err;
1882         pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
1883         return 0;
1884 }
1885
1886 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1887 {
1888         char buf[PATH_MAX];
1889         struct utsname uts;
1890         int len, err = 0;
1891         gzFile file;
1892
1893         uname(&uts);
1894         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1895         if (len < 0)
1896                 return -EINVAL;
1897         else if (len >= PATH_MAX)
1898                 return -ENAMETOOLONG;
1899
1900         /* gzopen also accepts uncompressed files. */
1901         file = gzopen(buf, "r");
1902         if (!file)
1903                 file = gzopen("/proc/config.gz", "r");
1904
1905         if (!file) {
1906                 pr_warn("failed to open system Kconfig\n");
1907                 return -ENOENT;
1908         }
1909
1910         while (gzgets(file, buf, sizeof(buf))) {
1911                 err = bpf_object__process_kconfig_line(obj, buf, data);
1912                 if (err) {
1913                         pr_warn("error parsing system Kconfig line '%s': %d\n",
1914                                 buf, err);
1915                         goto out;
1916                 }
1917         }
1918
1919 out:
1920         gzclose(file);
1921         return err;
1922 }
1923
1924 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1925                                         const char *config, void *data)
1926 {
1927         char buf[PATH_MAX];
1928         int err = 0;
1929         FILE *file;
1930
1931         file = fmemopen((void *)config, strlen(config), "r");
1932         if (!file) {
1933                 err = -errno;
1934                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
1935                 return err;
1936         }
1937
1938         while (fgets(buf, sizeof(buf), file)) {
1939                 err = bpf_object__process_kconfig_line(obj, buf, data);
1940                 if (err) {
1941                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1942                                 buf, err);
1943                         break;
1944                 }
1945         }
1946
1947         fclose(file);
1948         return err;
1949 }
1950
1951 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
1952 {
1953         struct extern_desc *last_ext = NULL, *ext;
1954         size_t map_sz;
1955         int i, err;
1956
1957         for (i = 0; i < obj->nr_extern; i++) {
1958                 ext = &obj->externs[i];
1959                 if (ext->type == EXT_KCFG)
1960                         last_ext = ext;
1961         }
1962
1963         if (!last_ext)
1964                 return 0;
1965
1966         map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
1967         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
1968                                             ".kconfig", obj->efile.symbols_shndx,
1969                                             NULL, map_sz);
1970         if (err)
1971                 return err;
1972
1973         obj->kconfig_map_idx = obj->nr_maps - 1;
1974
1975         return 0;
1976 }
1977
1978 const struct btf_type *
1979 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
1980 {
1981         const struct btf_type *t = btf__type_by_id(btf, id);
1982
1983         if (res_id)
1984                 *res_id = id;
1985
1986         while (btf_is_mod(t) || btf_is_typedef(t)) {
1987                 if (res_id)
1988                         *res_id = t->type;
1989                 t = btf__type_by_id(btf, t->type);
1990         }
1991
1992         return t;
1993 }
1994
1995 static const struct btf_type *
1996 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
1997 {
1998         const struct btf_type *t;
1999
2000         t = skip_mods_and_typedefs(btf, id, NULL);
2001         if (!btf_is_ptr(t))
2002                 return NULL;
2003
2004         t = skip_mods_and_typedefs(btf, t->type, res_id);
2005
2006         return btf_is_func_proto(t) ? t : NULL;
2007 }
2008
2009 static const char *__btf_kind_str(__u16 kind)
2010 {
2011         switch (kind) {
2012         case BTF_KIND_UNKN: return "void";
2013         case BTF_KIND_INT: return "int";
2014         case BTF_KIND_PTR: return "ptr";
2015         case BTF_KIND_ARRAY: return "array";
2016         case BTF_KIND_STRUCT: return "struct";
2017         case BTF_KIND_UNION: return "union";
2018         case BTF_KIND_ENUM: return "enum";
2019         case BTF_KIND_FWD: return "fwd";
2020         case BTF_KIND_TYPEDEF: return "typedef";
2021         case BTF_KIND_VOLATILE: return "volatile";
2022         case BTF_KIND_CONST: return "const";
2023         case BTF_KIND_RESTRICT: return "restrict";
2024         case BTF_KIND_FUNC: return "func";
2025         case BTF_KIND_FUNC_PROTO: return "func_proto";
2026         case BTF_KIND_VAR: return "var";
2027         case BTF_KIND_DATASEC: return "datasec";
2028         case BTF_KIND_FLOAT: return "float";
2029         case BTF_KIND_DECL_TAG: return "decl_tag";
2030         case BTF_KIND_TYPE_TAG: return "type_tag";
2031         case BTF_KIND_ENUM64: return "enum64";
2032         default: return "unknown";
2033         }
2034 }
2035
2036 const char *btf_kind_str(const struct btf_type *t)
2037 {
2038         return __btf_kind_str(btf_kind(t));
2039 }
2040
2041 /*
2042  * Fetch integer attribute of BTF map definition. Such attributes are
2043  * represented using a pointer to an array, in which dimensionality of array
2044  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2045  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2046  * type definition, while using only sizeof(void *) space in ELF data section.
2047  */
2048 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2049                               const struct btf_member *m, __u32 *res)
2050 {
2051         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2052         const char *name = btf__name_by_offset(btf, m->name_off);
2053         const struct btf_array *arr_info;
2054         const struct btf_type *arr_t;
2055
2056         if (!btf_is_ptr(t)) {
2057                 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2058                         map_name, name, btf_kind_str(t));
2059                 return false;
2060         }
2061
2062         arr_t = btf__type_by_id(btf, t->type);
2063         if (!arr_t) {
2064                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2065                         map_name, name, t->type);
2066                 return false;
2067         }
2068         if (!btf_is_array(arr_t)) {
2069                 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2070                         map_name, name, btf_kind_str(arr_t));
2071                 return false;
2072         }
2073         arr_info = btf_array(arr_t);
2074         *res = arr_info->nelems;
2075         return true;
2076 }
2077
2078 static int build_map_pin_path(struct bpf_map *map, const char *path)
2079 {
2080         char buf[PATH_MAX];
2081         int len;
2082
2083         if (!path)
2084                 path = "/sys/fs/bpf";
2085
2086         len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
2087         if (len < 0)
2088                 return -EINVAL;
2089         else if (len >= PATH_MAX)
2090                 return -ENAMETOOLONG;
2091
2092         return bpf_map__set_pin_path(map, buf);
2093 }
2094
2095 /* should match definition in bpf_helpers.h */
2096 enum libbpf_pin_type {
2097         LIBBPF_PIN_NONE,
2098         /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2099         LIBBPF_PIN_BY_NAME,
2100 };
2101
2102 int parse_btf_map_def(const char *map_name, struct btf *btf,
2103                       const struct btf_type *def_t, bool strict,
2104                       struct btf_map_def *map_def, struct btf_map_def *inner_def)
2105 {
2106         const struct btf_type *t;
2107         const struct btf_member *m;
2108         bool is_inner = inner_def == NULL;
2109         int vlen, i;
2110
2111         vlen = btf_vlen(def_t);
2112         m = btf_members(def_t);
2113         for (i = 0; i < vlen; i++, m++) {
2114                 const char *name = btf__name_by_offset(btf, m->name_off);
2115
2116                 if (!name) {
2117                         pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2118                         return -EINVAL;
2119                 }
2120                 if (strcmp(name, "type") == 0) {
2121                         if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2122                                 return -EINVAL;
2123                         map_def->parts |= MAP_DEF_MAP_TYPE;
2124                 } else if (strcmp(name, "max_entries") == 0) {
2125                         if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2126                                 return -EINVAL;
2127                         map_def->parts |= MAP_DEF_MAX_ENTRIES;
2128                 } else if (strcmp(name, "map_flags") == 0) {
2129                         if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2130                                 return -EINVAL;
2131                         map_def->parts |= MAP_DEF_MAP_FLAGS;
2132                 } else if (strcmp(name, "numa_node") == 0) {
2133                         if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2134                                 return -EINVAL;
2135                         map_def->parts |= MAP_DEF_NUMA_NODE;
2136                 } else if (strcmp(name, "key_size") == 0) {
2137                         __u32 sz;
2138
2139                         if (!get_map_field_int(map_name, btf, m, &sz))
2140                                 return -EINVAL;
2141                         if (map_def->key_size && map_def->key_size != sz) {
2142                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
2143                                         map_name, map_def->key_size, sz);
2144                                 return -EINVAL;
2145                         }
2146                         map_def->key_size = sz;
2147                         map_def->parts |= MAP_DEF_KEY_SIZE;
2148                 } else if (strcmp(name, "key") == 0) {
2149                         __s64 sz;
2150
2151                         t = btf__type_by_id(btf, m->type);
2152                         if (!t) {
2153                                 pr_warn("map '%s': key type [%d] not found.\n",
2154                                         map_name, m->type);
2155                                 return -EINVAL;
2156                         }
2157                         if (!btf_is_ptr(t)) {
2158                                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2159                                         map_name, btf_kind_str(t));
2160                                 return -EINVAL;
2161                         }
2162                         sz = btf__resolve_size(btf, t->type);
2163                         if (sz < 0) {
2164                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2165                                         map_name, t->type, (ssize_t)sz);
2166                                 return sz;
2167                         }
2168                         if (map_def->key_size && map_def->key_size != sz) {
2169                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2170                                         map_name, map_def->key_size, (ssize_t)sz);
2171                                 return -EINVAL;
2172                         }
2173                         map_def->key_size = sz;
2174                         map_def->key_type_id = t->type;
2175                         map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2176                 } else if (strcmp(name, "value_size") == 0) {
2177                         __u32 sz;
2178
2179                         if (!get_map_field_int(map_name, btf, m, &sz))
2180                                 return -EINVAL;
2181                         if (map_def->value_size && map_def->value_size != sz) {
2182                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2183                                         map_name, map_def->value_size, sz);
2184                                 return -EINVAL;
2185                         }
2186                         map_def->value_size = sz;
2187                         map_def->parts |= MAP_DEF_VALUE_SIZE;
2188                 } else if (strcmp(name, "value") == 0) {
2189                         __s64 sz;
2190
2191                         t = btf__type_by_id(btf, m->type);
2192                         if (!t) {
2193                                 pr_warn("map '%s': value type [%d] not found.\n",
2194                                         map_name, m->type);
2195                                 return -EINVAL;
2196                         }
2197                         if (!btf_is_ptr(t)) {
2198                                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2199                                         map_name, btf_kind_str(t));
2200                                 return -EINVAL;
2201                         }
2202                         sz = btf__resolve_size(btf, t->type);
2203                         if (sz < 0) {
2204                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2205                                         map_name, t->type, (ssize_t)sz);
2206                                 return sz;
2207                         }
2208                         if (map_def->value_size && map_def->value_size != sz) {
2209                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2210                                         map_name, map_def->value_size, (ssize_t)sz);
2211                                 return -EINVAL;
2212                         }
2213                         map_def->value_size = sz;
2214                         map_def->value_type_id = t->type;
2215                         map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2216                 }
2217                 else if (strcmp(name, "values") == 0) {
2218                         bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2219                         bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2220                         const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2221                         char inner_map_name[128];
2222                         int err;
2223
2224                         if (is_inner) {
2225                                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2226                                         map_name);
2227                                 return -ENOTSUP;
2228                         }
2229                         if (i != vlen - 1) {
2230                                 pr_warn("map '%s': '%s' member should be last.\n",
2231                                         map_name, name);
2232                                 return -EINVAL;
2233                         }
2234                         if (!is_map_in_map && !is_prog_array) {
2235                                 pr_warn("map '%s': should be map-in-map or prog-array.\n",
2236                                         map_name);
2237                                 return -ENOTSUP;
2238                         }
2239                         if (map_def->value_size && map_def->value_size != 4) {
2240                                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2241                                         map_name, map_def->value_size);
2242                                 return -EINVAL;
2243                         }
2244                         map_def->value_size = 4;
2245                         t = btf__type_by_id(btf, m->type);
2246                         if (!t) {
2247                                 pr_warn("map '%s': %s type [%d] not found.\n",
2248                                         map_name, desc, m->type);
2249                                 return -EINVAL;
2250                         }
2251                         if (!btf_is_array(t) || btf_array(t)->nelems) {
2252                                 pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2253                                         map_name, desc);
2254                                 return -EINVAL;
2255                         }
2256                         t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2257                         if (!btf_is_ptr(t)) {
2258                                 pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2259                                         map_name, desc, btf_kind_str(t));
2260                                 return -EINVAL;
2261                         }
2262                         t = skip_mods_and_typedefs(btf, t->type, NULL);
2263                         if (is_prog_array) {
2264                                 if (!btf_is_func_proto(t)) {
2265                                         pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2266                                                 map_name, btf_kind_str(t));
2267                                         return -EINVAL;
2268                                 }
2269                                 continue;
2270                         }
2271                         if (!btf_is_struct(t)) {
2272                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2273                                         map_name, btf_kind_str(t));
2274                                 return -EINVAL;
2275                         }
2276
2277                         snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2278                         err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2279                         if (err)
2280                                 return err;
2281
2282                         map_def->parts |= MAP_DEF_INNER_MAP;
2283                 } else if (strcmp(name, "pinning") == 0) {
2284                         __u32 val;
2285
2286                         if (is_inner) {
2287                                 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2288                                 return -EINVAL;
2289                         }
2290                         if (!get_map_field_int(map_name, btf, m, &val))
2291                                 return -EINVAL;
2292                         if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2293                                 pr_warn("map '%s': invalid pinning value %u.\n",
2294                                         map_name, val);
2295                                 return -EINVAL;
2296                         }
2297                         map_def->pinning = val;
2298                         map_def->parts |= MAP_DEF_PINNING;
2299                 } else if (strcmp(name, "map_extra") == 0) {
2300                         __u32 map_extra;
2301
2302                         if (!get_map_field_int(map_name, btf, m, &map_extra))
2303                                 return -EINVAL;
2304                         map_def->map_extra = map_extra;
2305                         map_def->parts |= MAP_DEF_MAP_EXTRA;
2306                 } else {
2307                         if (strict) {
2308                                 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2309                                 return -ENOTSUP;
2310                         }
2311                         pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2312                 }
2313         }
2314
2315         if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2316                 pr_warn("map '%s': map type isn't specified.\n", map_name);
2317                 return -EINVAL;
2318         }
2319
2320         return 0;
2321 }
2322
2323 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2324 {
2325         map->def.type = def->map_type;
2326         map->def.key_size = def->key_size;
2327         map->def.value_size = def->value_size;
2328         map->def.max_entries = def->max_entries;
2329         map->def.map_flags = def->map_flags;
2330         map->map_extra = def->map_extra;
2331
2332         map->numa_node = def->numa_node;
2333         map->btf_key_type_id = def->key_type_id;
2334         map->btf_value_type_id = def->value_type_id;
2335
2336         if (def->parts & MAP_DEF_MAP_TYPE)
2337                 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2338
2339         if (def->parts & MAP_DEF_KEY_TYPE)
2340                 pr_debug("map '%s': found key [%u], sz = %u.\n",
2341                          map->name, def->key_type_id, def->key_size);
2342         else if (def->parts & MAP_DEF_KEY_SIZE)
2343                 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2344
2345         if (def->parts & MAP_DEF_VALUE_TYPE)
2346                 pr_debug("map '%s': found value [%u], sz = %u.\n",
2347                          map->name, def->value_type_id, def->value_size);
2348         else if (def->parts & MAP_DEF_VALUE_SIZE)
2349                 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2350
2351         if (def->parts & MAP_DEF_MAX_ENTRIES)
2352                 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2353         if (def->parts & MAP_DEF_MAP_FLAGS)
2354                 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2355         if (def->parts & MAP_DEF_MAP_EXTRA)
2356                 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2357                          (unsigned long long)def->map_extra);
2358         if (def->parts & MAP_DEF_PINNING)
2359                 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2360         if (def->parts & MAP_DEF_NUMA_NODE)
2361                 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2362
2363         if (def->parts & MAP_DEF_INNER_MAP)
2364                 pr_debug("map '%s': found inner map definition.\n", map->name);
2365 }
2366
2367 static const char *btf_var_linkage_str(__u32 linkage)
2368 {
2369         switch (linkage) {
2370         case BTF_VAR_STATIC: return "static";
2371         case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2372         case BTF_VAR_GLOBAL_EXTERN: return "extern";
2373         default: return "unknown";
2374         }
2375 }
2376
2377 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2378                                          const struct btf_type *sec,
2379                                          int var_idx, int sec_idx,
2380                                          const Elf_Data *data, bool strict,
2381                                          const char *pin_root_path)
2382 {
2383         struct btf_map_def map_def = {}, inner_def = {};
2384         const struct btf_type *var, *def;
2385         const struct btf_var_secinfo *vi;
2386         const struct btf_var *var_extra;
2387         const char *map_name;
2388         struct bpf_map *map;
2389         int err;
2390
2391         vi = btf_var_secinfos(sec) + var_idx;
2392         var = btf__type_by_id(obj->btf, vi->type);
2393         var_extra = btf_var(var);
2394         map_name = btf__name_by_offset(obj->btf, var->name_off);
2395
2396         if (map_name == NULL || map_name[0] == '\0') {
2397                 pr_warn("map #%d: empty name.\n", var_idx);
2398                 return -EINVAL;
2399         }
2400         if ((__u64)vi->offset + vi->size > data->d_size) {
2401                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2402                 return -EINVAL;
2403         }
2404         if (!btf_is_var(var)) {
2405                 pr_warn("map '%s': unexpected var kind %s.\n",
2406                         map_name, btf_kind_str(var));
2407                 return -EINVAL;
2408         }
2409         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2410                 pr_warn("map '%s': unsupported map linkage %s.\n",
2411                         map_name, btf_var_linkage_str(var_extra->linkage));
2412                 return -EOPNOTSUPP;
2413         }
2414
2415         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2416         if (!btf_is_struct(def)) {
2417                 pr_warn("map '%s': unexpected def kind %s.\n",
2418                         map_name, btf_kind_str(var));
2419                 return -EINVAL;
2420         }
2421         if (def->size > vi->size) {
2422                 pr_warn("map '%s': invalid def size.\n", map_name);
2423                 return -EINVAL;
2424         }
2425
2426         map = bpf_object__add_map(obj);
2427         if (IS_ERR(map))
2428                 return PTR_ERR(map);
2429         map->name = strdup(map_name);
2430         if (!map->name) {
2431                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2432                 return -ENOMEM;
2433         }
2434         map->libbpf_type = LIBBPF_MAP_UNSPEC;
2435         map->def.type = BPF_MAP_TYPE_UNSPEC;
2436         map->sec_idx = sec_idx;
2437         map->sec_offset = vi->offset;
2438         map->btf_var_idx = var_idx;
2439         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2440                  map_name, map->sec_idx, map->sec_offset);
2441
2442         err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2443         if (err)
2444                 return err;
2445
2446         fill_map_from_def(map, &map_def);
2447
2448         if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2449                 err = build_map_pin_path(map, pin_root_path);
2450                 if (err) {
2451                         pr_warn("map '%s': couldn't build pin path.\n", map->name);
2452                         return err;
2453                 }
2454         }
2455
2456         if (map_def.parts & MAP_DEF_INNER_MAP) {
2457                 map->inner_map = calloc(1, sizeof(*map->inner_map));
2458                 if (!map->inner_map)
2459                         return -ENOMEM;
2460                 map->inner_map->fd = -1;
2461                 map->inner_map->sec_idx = sec_idx;
2462                 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2463                 if (!map->inner_map->name)
2464                         return -ENOMEM;
2465                 sprintf(map->inner_map->name, "%s.inner", map_name);
2466
2467                 fill_map_from_def(map->inner_map, &inner_def);
2468         }
2469
2470         err = bpf_map_find_btf_info(obj, map);
2471         if (err)
2472                 return err;
2473
2474         return 0;
2475 }
2476
2477 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2478                                           const char *pin_root_path)
2479 {
2480         const struct btf_type *sec = NULL;
2481         int nr_types, i, vlen, err;
2482         const struct btf_type *t;
2483         const char *name;
2484         Elf_Data *data;
2485         Elf_Scn *scn;
2486
2487         if (obj->efile.btf_maps_shndx < 0)
2488                 return 0;
2489
2490         scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2491         data = elf_sec_data(obj, scn);
2492         if (!scn || !data) {
2493                 pr_warn("elf: failed to get %s map definitions for %s\n",
2494                         MAPS_ELF_SEC, obj->path);
2495                 return -EINVAL;
2496         }
2497
2498         nr_types = btf__type_cnt(obj->btf);
2499         for (i = 1; i < nr_types; i++) {
2500                 t = btf__type_by_id(obj->btf, i);
2501                 if (!btf_is_datasec(t))
2502                         continue;
2503                 name = btf__name_by_offset(obj->btf, t->name_off);
2504                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2505                         sec = t;
2506                         obj->efile.btf_maps_sec_btf_id = i;
2507                         break;
2508                 }
2509         }
2510
2511         if (!sec) {
2512                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2513                 return -ENOENT;
2514         }
2515
2516         vlen = btf_vlen(sec);
2517         for (i = 0; i < vlen; i++) {
2518                 err = bpf_object__init_user_btf_map(obj, sec, i,
2519                                                     obj->efile.btf_maps_shndx,
2520                                                     data, strict,
2521                                                     pin_root_path);
2522                 if (err)
2523                         return err;
2524         }
2525
2526         return 0;
2527 }
2528
2529 static int bpf_object__init_maps(struct bpf_object *obj,
2530                                  const struct bpf_object_open_opts *opts)
2531 {
2532         const char *pin_root_path;
2533         bool strict;
2534         int err = 0;
2535
2536         strict = !OPTS_GET(opts, relaxed_maps, false);
2537         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2538
2539         err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2540         err = err ?: bpf_object__init_global_data_maps(obj);
2541         err = err ?: bpf_object__init_kconfig_map(obj);
2542         err = err ?: bpf_object__init_struct_ops_maps(obj);
2543
2544         return err;
2545 }
2546
2547 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2548 {
2549         Elf64_Shdr *sh;
2550
2551         sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
2552         if (!sh)
2553                 return false;
2554
2555         return sh->sh_flags & SHF_EXECINSTR;
2556 }
2557
2558 static bool btf_needs_sanitization(struct bpf_object *obj)
2559 {
2560         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2561         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2562         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2563         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2564         bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2565         bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2566         bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2567
2568         return !has_func || !has_datasec || !has_func_global || !has_float ||
2569                !has_decl_tag || !has_type_tag || !has_enum64;
2570 }
2571
2572 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2573 {
2574         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2575         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2576         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2577         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2578         bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2579         bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2580         bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2581         int enum64_placeholder_id = 0;
2582         struct btf_type *t;
2583         int i, j, vlen;
2584
2585         for (i = 1; i < btf__type_cnt(btf); i++) {
2586                 t = (struct btf_type *)btf__type_by_id(btf, i);
2587
2588                 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
2589                         /* replace VAR/DECL_TAG with INT */
2590                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2591                         /*
2592                          * using size = 1 is the safest choice, 4 will be too
2593                          * big and cause kernel BTF validation failure if
2594                          * original variable took less than 4 bytes
2595                          */
2596                         t->size = 1;
2597                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2598                 } else if (!has_datasec && btf_is_datasec(t)) {
2599                         /* replace DATASEC with STRUCT */
2600                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
2601                         struct btf_member *m = btf_members(t);
2602                         struct btf_type *vt;
2603                         char *name;
2604
2605                         name = (char *)btf__name_by_offset(btf, t->name_off);
2606                         while (*name) {
2607                                 if (*name == '.')
2608                                         *name = '_';
2609                                 name++;
2610                         }
2611
2612                         vlen = btf_vlen(t);
2613                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2614                         for (j = 0; j < vlen; j++, v++, m++) {
2615                                 /* order of field assignments is important */
2616                                 m->offset = v->offset * 8;
2617                                 m->type = v->type;
2618                                 /* preserve variable name as member name */
2619                                 vt = (void *)btf__type_by_id(btf, v->type);
2620                                 m->name_off = vt->name_off;
2621                         }
2622                 } else if (!has_func && btf_is_func_proto(t)) {
2623                         /* replace FUNC_PROTO with ENUM */
2624                         vlen = btf_vlen(t);
2625                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2626                         t->size = sizeof(__u32); /* kernel enforced */
2627                 } else if (!has_func && btf_is_func(t)) {
2628                         /* replace FUNC with TYPEDEF */
2629                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2630                 } else if (!has_func_global && btf_is_func(t)) {
2631                         /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2632                         t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2633                 } else if (!has_float && btf_is_float(t)) {
2634                         /* replace FLOAT with an equally-sized empty STRUCT;
2635                          * since C compilers do not accept e.g. "float" as a
2636                          * valid struct name, make it anonymous
2637                          */
2638                         t->name_off = 0;
2639                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
2640                 } else if (!has_type_tag && btf_is_type_tag(t)) {
2641                         /* replace TYPE_TAG with a CONST */
2642                         t->name_off = 0;
2643                         t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
2644                 } else if (!has_enum64 && btf_is_enum(t)) {
2645                         /* clear the kflag */
2646                         t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
2647                 } else if (!has_enum64 && btf_is_enum64(t)) {
2648                         /* replace ENUM64 with a union */
2649                         struct btf_member *m;
2650
2651                         if (enum64_placeholder_id == 0) {
2652                                 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
2653                                 if (enum64_placeholder_id < 0)
2654                                         return enum64_placeholder_id;
2655
2656                                 t = (struct btf_type *)btf__type_by_id(btf, i);
2657                         }
2658
2659                         m = btf_members(t);
2660                         vlen = btf_vlen(t);
2661                         t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
2662                         for (j = 0; j < vlen; j++, m++) {
2663                                 m->type = enum64_placeholder_id;
2664                                 m->offset = 0;
2665                         }
2666                 }
2667         }
2668
2669         return 0;
2670 }
2671
2672 static bool libbpf_needs_btf(const struct bpf_object *obj)
2673 {
2674         return obj->efile.btf_maps_shndx >= 0 ||
2675                obj->efile.st_ops_shndx >= 0 ||
2676                obj->nr_extern > 0;
2677 }
2678
2679 static bool kernel_needs_btf(const struct bpf_object *obj)
2680 {
2681         return obj->efile.st_ops_shndx >= 0;
2682 }
2683
2684 static int bpf_object__init_btf(struct bpf_object *obj,
2685                                 Elf_Data *btf_data,
2686                                 Elf_Data *btf_ext_data)
2687 {
2688         int err = -ENOENT;
2689
2690         if (btf_data) {
2691                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2692                 err = libbpf_get_error(obj->btf);
2693                 if (err) {
2694                         obj->btf = NULL;
2695                         pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
2696                         goto out;
2697                 }
2698                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2699                 btf__set_pointer_size(obj->btf, 8);
2700         }
2701         if (btf_ext_data) {
2702                 struct btf_ext_info *ext_segs[3];
2703                 int seg_num, sec_num;
2704
2705                 if (!obj->btf) {
2706                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2707                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2708                         goto out;
2709                 }
2710                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
2711                 err = libbpf_get_error(obj->btf_ext);
2712                 if (err) {
2713                         pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
2714                                 BTF_EXT_ELF_SEC, err);
2715                         obj->btf_ext = NULL;
2716                         goto out;
2717                 }
2718
2719                 /* setup .BTF.ext to ELF section mapping */
2720                 ext_segs[0] = &obj->btf_ext->func_info;
2721                 ext_segs[1] = &obj->btf_ext->line_info;
2722                 ext_segs[2] = &obj->btf_ext->core_relo_info;
2723                 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
2724                         struct btf_ext_info *seg = ext_segs[seg_num];
2725                         const struct btf_ext_info_sec *sec;
2726                         const char *sec_name;
2727                         Elf_Scn *scn;
2728
2729                         if (seg->sec_cnt == 0)
2730                                 continue;
2731
2732                         seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
2733                         if (!seg->sec_idxs) {
2734                                 err = -ENOMEM;
2735                                 goto out;
2736                         }
2737
2738                         sec_num = 0;
2739                         for_each_btf_ext_sec(seg, sec) {
2740                                 /* preventively increment index to avoid doing
2741                                  * this before every continue below
2742                                  */
2743                                 sec_num++;
2744
2745                                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
2746                                 if (str_is_empty(sec_name))
2747                                         continue;
2748                                 scn = elf_sec_by_name(obj, sec_name);
2749                                 if (!scn)
2750                                         continue;
2751
2752                                 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
2753                         }
2754                 }
2755         }
2756 out:
2757         if (err && libbpf_needs_btf(obj)) {
2758                 pr_warn("BTF is required, but is missing or corrupted.\n");
2759                 return err;
2760         }
2761         return 0;
2762 }
2763
2764 static int compare_vsi_off(const void *_a, const void *_b)
2765 {
2766         const struct btf_var_secinfo *a = _a;
2767         const struct btf_var_secinfo *b = _b;
2768
2769         return a->offset - b->offset;
2770 }
2771
2772 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
2773                              struct btf_type *t)
2774 {
2775         __u32 size = 0, off = 0, i, vars = btf_vlen(t);
2776         const char *name = btf__name_by_offset(btf, t->name_off);
2777         const struct btf_type *t_var;
2778         struct btf_var_secinfo *vsi;
2779         const struct btf_var *var;
2780         int ret;
2781
2782         if (!name) {
2783                 pr_debug("No name found in string section for DATASEC kind.\n");
2784                 return -ENOENT;
2785         }
2786
2787         /* .extern datasec size and var offsets were set correctly during
2788          * extern collection step, so just skip straight to sorting variables
2789          */
2790         if (t->size)
2791                 goto sort_vars;
2792
2793         ret = find_elf_sec_sz(obj, name, &size);
2794         if (ret || !size) {
2795                 pr_debug("Invalid size for section %s: %u bytes\n", name, size);
2796                 return -ENOENT;
2797         }
2798
2799         t->size = size;
2800
2801         for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
2802                 t_var = btf__type_by_id(btf, vsi->type);
2803                 if (!t_var || !btf_is_var(t_var)) {
2804                         pr_debug("Non-VAR type seen in section %s\n", name);
2805                         return -EINVAL;
2806                 }
2807
2808                 var = btf_var(t_var);
2809                 if (var->linkage == BTF_VAR_STATIC)
2810                         continue;
2811
2812                 name = btf__name_by_offset(btf, t_var->name_off);
2813                 if (!name) {
2814                         pr_debug("No name found in string section for VAR kind\n");
2815                         return -ENOENT;
2816                 }
2817
2818                 ret = find_elf_var_offset(obj, name, &off);
2819                 if (ret) {
2820                         pr_debug("No offset found in symbol table for VAR %s\n",
2821                                  name);
2822                         return -ENOENT;
2823                 }
2824
2825                 vsi->offset = off;
2826         }
2827
2828 sort_vars:
2829         qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
2830         return 0;
2831 }
2832
2833 static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
2834 {
2835         int err = 0;
2836         __u32 i, n = btf__type_cnt(btf);
2837
2838         for (i = 1; i < n; i++) {
2839                 struct btf_type *t = btf_type_by_id(btf, i);
2840
2841                 /* Loader needs to fix up some of the things compiler
2842                  * couldn't get its hands on while emitting BTF. This
2843                  * is section size and global variable offset. We use
2844                  * the info from the ELF itself for this purpose.
2845                  */
2846                 if (btf_is_datasec(t)) {
2847                         err = btf_fixup_datasec(obj, btf, t);
2848                         if (err)
2849                                 break;
2850                 }
2851         }
2852
2853         return libbpf_err(err);
2854 }
2855
2856 static int bpf_object__finalize_btf(struct bpf_object *obj)
2857 {
2858         int err;
2859
2860         if (!obj->btf)
2861                 return 0;
2862
2863         err = btf_finalize_data(obj, obj->btf);
2864         if (err) {
2865                 pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
2866                 return err;
2867         }
2868
2869         return 0;
2870 }
2871
2872 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
2873 {
2874         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
2875             prog->type == BPF_PROG_TYPE_LSM)
2876                 return true;
2877
2878         /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
2879          * also need vmlinux BTF
2880          */
2881         if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
2882                 return true;
2883
2884         return false;
2885 }
2886
2887 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
2888 {
2889         struct bpf_program *prog;
2890         int i;
2891
2892         /* CO-RE relocations need kernel BTF, only when btf_custom_path
2893          * is not specified
2894          */
2895         if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
2896                 return true;
2897
2898         /* Support for typed ksyms needs kernel BTF */
2899         for (i = 0; i < obj->nr_extern; i++) {
2900                 const struct extern_desc *ext;
2901
2902                 ext = &obj->externs[i];
2903                 if (ext->type == EXT_KSYM && ext->ksym.type_id)
2904                         return true;
2905         }
2906
2907         bpf_object__for_each_program(prog, obj) {
2908                 if (!prog->autoload)
2909                         continue;
2910                 if (prog_needs_vmlinux_btf(prog))
2911                         return true;
2912         }
2913
2914         return false;
2915 }
2916
2917 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
2918 {
2919         int err;
2920
2921         /* btf_vmlinux could be loaded earlier */
2922         if (obj->btf_vmlinux || obj->gen_loader)
2923                 return 0;
2924
2925         if (!force && !obj_needs_vmlinux_btf(obj))
2926                 return 0;
2927
2928         obj->btf_vmlinux = btf__load_vmlinux_btf();
2929         err = libbpf_get_error(obj->btf_vmlinux);
2930         if (err) {
2931                 pr_warn("Error loading vmlinux BTF: %d\n", err);
2932                 obj->btf_vmlinux = NULL;
2933                 return err;
2934         }
2935         return 0;
2936 }
2937
2938 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
2939 {
2940         struct btf *kern_btf = obj->btf;
2941         bool btf_mandatory, sanitize;
2942         int i, err = 0;
2943
2944         if (!obj->btf)
2945                 return 0;
2946
2947         if (!kernel_supports(obj, FEAT_BTF)) {
2948                 if (kernel_needs_btf(obj)) {
2949                         err = -EOPNOTSUPP;
2950                         goto report;
2951                 }
2952                 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
2953                 return 0;
2954         }
2955
2956         /* Even though some subprogs are global/weak, user might prefer more
2957          * permissive BPF verification process that BPF verifier performs for
2958          * static functions, taking into account more context from the caller
2959          * functions. In such case, they need to mark such subprogs with
2960          * __attribute__((visibility("hidden"))) and libbpf will adjust
2961          * corresponding FUNC BTF type to be marked as static and trigger more
2962          * involved BPF verification process.
2963          */
2964         for (i = 0; i < obj->nr_programs; i++) {
2965                 struct bpf_program *prog = &obj->programs[i];
2966                 struct btf_type *t;
2967                 const char *name;
2968                 int j, n;
2969
2970                 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
2971                         continue;
2972
2973                 n = btf__type_cnt(obj->btf);
2974                 for (j = 1; j < n; j++) {
2975                         t = btf_type_by_id(obj->btf, j);
2976                         if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
2977                                 continue;
2978
2979                         name = btf__str_by_offset(obj->btf, t->name_off);
2980                         if (strcmp(name, prog->name) != 0)
2981                                 continue;
2982
2983                         t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
2984                         break;
2985                 }
2986         }
2987
2988         sanitize = btf_needs_sanitization(obj);
2989         if (sanitize) {
2990                 const void *raw_data;
2991                 __u32 sz;
2992
2993                 /* clone BTF to sanitize a copy and leave the original intact */
2994                 raw_data = btf__raw_data(obj->btf, &sz);
2995                 kern_btf = btf__new(raw_data, sz);
2996                 err = libbpf_get_error(kern_btf);
2997                 if (err)
2998                         return err;
2999
3000                 /* enforce 8-byte pointers for BPF-targeted BTFs */
3001                 btf__set_pointer_size(obj->btf, 8);
3002                 err = bpf_object__sanitize_btf(obj, kern_btf);
3003                 if (err)
3004                         return err;
3005         }
3006
3007         if (obj->gen_loader) {
3008                 __u32 raw_size = 0;
3009                 const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3010
3011                 if (!raw_data)
3012                         return -ENOMEM;
3013                 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3014                 /* Pretend to have valid FD to pass various fd >= 0 checks.
3015                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3016                  */
3017                 btf__set_fd(kern_btf, 0);
3018         } else {
3019                 /* currently BPF_BTF_LOAD only supports log_level 1 */
3020                 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3021                                            obj->log_level ? 1 : 0);
3022         }
3023         if (sanitize) {
3024                 if (!err) {
3025                         /* move fd to libbpf's BTF */
3026                         btf__set_fd(obj->btf, btf__fd(kern_btf));
3027                         btf__set_fd(kern_btf, -1);
3028                 }
3029                 btf__free(kern_btf);
3030         }
3031 report:
3032         if (err) {
3033                 btf_mandatory = kernel_needs_btf(obj);
3034                 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
3035                         btf_mandatory ? "BTF is mandatory, can't proceed."
3036                                       : "BTF is optional, ignoring.");
3037                 if (!btf_mandatory)
3038                         err = 0;
3039         }
3040         return err;
3041 }
3042
3043 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3044 {
3045         const char *name;
3046
3047         name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3048         if (!name) {
3049                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3050                         off, obj->path, elf_errmsg(-1));
3051                 return NULL;
3052         }
3053
3054         return name;
3055 }
3056
3057 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3058 {
3059         const char *name;
3060
3061         name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3062         if (!name) {
3063                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3064                         off, obj->path, elf_errmsg(-1));
3065                 return NULL;
3066         }
3067
3068         return name;
3069 }
3070
3071 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3072 {
3073         Elf_Scn *scn;
3074
3075         scn = elf_getscn(obj->efile.elf, idx);
3076         if (!scn) {
3077                 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3078                         idx, obj->path, elf_errmsg(-1));
3079                 return NULL;
3080         }
3081         return scn;
3082 }
3083
3084 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3085 {
3086         Elf_Scn *scn = NULL;
3087         Elf *elf = obj->efile.elf;
3088         const char *sec_name;
3089
3090         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3091                 sec_name = elf_sec_name(obj, scn);
3092                 if (!sec_name)
3093                         return NULL;
3094
3095                 if (strcmp(sec_name, name) != 0)
3096                         continue;
3097
3098                 return scn;
3099         }
3100         return NULL;
3101 }
3102
3103 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3104 {
3105         Elf64_Shdr *shdr;
3106
3107         if (!scn)
3108                 return NULL;
3109
3110         shdr = elf64_getshdr(scn);
3111         if (!shdr) {
3112                 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3113                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3114                 return NULL;
3115         }
3116
3117         return shdr;
3118 }
3119
3120 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3121 {
3122         const char *name;
3123         Elf64_Shdr *sh;
3124
3125         if (!scn)
3126                 return NULL;
3127
3128         sh = elf_sec_hdr(obj, scn);
3129         if (!sh)
3130                 return NULL;
3131
3132         name = elf_sec_str(obj, sh->sh_name);
3133         if (!name) {
3134                 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3135                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3136                 return NULL;
3137         }
3138
3139         return name;
3140 }
3141
3142 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3143 {
3144         Elf_Data *data;
3145
3146         if (!scn)
3147                 return NULL;
3148
3149         data = elf_getdata(scn, 0);
3150         if (!data) {
3151                 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3152                         elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3153                         obj->path, elf_errmsg(-1));
3154                 return NULL;
3155         }
3156
3157         return data;
3158 }
3159
3160 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3161 {
3162         if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3163                 return NULL;
3164
3165         return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3166 }
3167
3168 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3169 {
3170         if (idx >= data->d_size / sizeof(Elf64_Rel))
3171                 return NULL;
3172
3173         return (Elf64_Rel *)data->d_buf + idx;
3174 }
3175
3176 static bool is_sec_name_dwarf(const char *name)
3177 {
3178         /* approximation, but the actual list is too long */
3179         return str_has_pfx(name, ".debug_");
3180 }
3181
3182 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3183 {
3184         /* no special handling of .strtab */
3185         if (hdr->sh_type == SHT_STRTAB)
3186                 return true;
3187
3188         /* ignore .llvm_addrsig section as well */
3189         if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3190                 return true;
3191
3192         /* no subprograms will lead to an empty .text section, ignore it */
3193         if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3194             strcmp(name, ".text") == 0)
3195                 return true;
3196
3197         /* DWARF sections */
3198         if (is_sec_name_dwarf(name))
3199                 return true;
3200
3201         if (str_has_pfx(name, ".rel")) {
3202                 name += sizeof(".rel") - 1;
3203                 /* DWARF section relocations */
3204                 if (is_sec_name_dwarf(name))
3205                         return true;
3206
3207                 /* .BTF and .BTF.ext don't need relocations */
3208                 if (strcmp(name, BTF_ELF_SEC) == 0 ||
3209                     strcmp(name, BTF_EXT_ELF_SEC) == 0)
3210                         return true;
3211         }
3212
3213         return false;
3214 }
3215
3216 static int cmp_progs(const void *_a, const void *_b)
3217 {
3218         const struct bpf_program *a = _a;
3219         const struct bpf_program *b = _b;
3220
3221         if (a->sec_idx != b->sec_idx)
3222                 return a->sec_idx < b->sec_idx ? -1 : 1;
3223
3224         /* sec_insn_off can't be the same within the section */
3225         return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3226 }
3227
3228 static int bpf_object__elf_collect(struct bpf_object *obj)
3229 {
3230         struct elf_sec_desc *sec_desc;
3231         Elf *elf = obj->efile.elf;
3232         Elf_Data *btf_ext_data = NULL;
3233         Elf_Data *btf_data = NULL;
3234         int idx = 0, err = 0;
3235         const char *name;
3236         Elf_Data *data;
3237         Elf_Scn *scn;
3238         Elf64_Shdr *sh;
3239
3240         /* ELF section indices are 0-based, but sec #0 is special "invalid"
3241          * section. e_shnum does include sec #0, so e_shnum is the necessary
3242          * size of an array to keep all the sections.
3243          */
3244         obj->efile.sec_cnt = obj->efile.ehdr->e_shnum;
3245         obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3246         if (!obj->efile.secs)
3247                 return -ENOMEM;
3248
3249         /* a bunch of ELF parsing functionality depends on processing symbols,
3250          * so do the first pass and find the symbol table
3251          */
3252         scn = NULL;
3253         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3254                 sh = elf_sec_hdr(obj, scn);
3255                 if (!sh)
3256                         return -LIBBPF_ERRNO__FORMAT;
3257
3258                 if (sh->sh_type == SHT_SYMTAB) {
3259                         if (obj->efile.symbols) {
3260                                 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3261                                 return -LIBBPF_ERRNO__FORMAT;
3262                         }
3263
3264                         data = elf_sec_data(obj, scn);
3265                         if (!data)
3266                                 return -LIBBPF_ERRNO__FORMAT;
3267
3268                         idx = elf_ndxscn(scn);
3269
3270                         obj->efile.symbols = data;
3271                         obj->efile.symbols_shndx = idx;
3272                         obj->efile.strtabidx = sh->sh_link;
3273                 }
3274         }
3275
3276         if (!obj->efile.symbols) {
3277                 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3278                         obj->path);
3279                 return -ENOENT;
3280         }
3281
3282         scn = NULL;
3283         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3284                 idx = elf_ndxscn(scn);
3285                 sec_desc = &obj->efile.secs[idx];
3286
3287                 sh = elf_sec_hdr(obj, scn);
3288                 if (!sh)
3289                         return -LIBBPF_ERRNO__FORMAT;
3290
3291                 name = elf_sec_str(obj, sh->sh_name);
3292                 if (!name)
3293                         return -LIBBPF_ERRNO__FORMAT;
3294
3295                 if (ignore_elf_section(sh, name))
3296                         continue;
3297
3298                 data = elf_sec_data(obj, scn);
3299                 if (!data)
3300                         return -LIBBPF_ERRNO__FORMAT;
3301
3302                 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3303                          idx, name, (unsigned long)data->d_size,
3304                          (int)sh->sh_link, (unsigned long)sh->sh_flags,
3305                          (int)sh->sh_type);
3306
3307                 if (strcmp(name, "license") == 0) {
3308                         err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3309                         if (err)
3310                                 return err;
3311                 } else if (strcmp(name, "version") == 0) {
3312                         err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3313                         if (err)
3314                                 return err;
3315                 } else if (strcmp(name, "maps") == 0) {
3316                         obj->efile.maps_shndx = idx;
3317                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3318                         obj->efile.btf_maps_shndx = idx;
3319                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3320                         if (sh->sh_type != SHT_PROGBITS)
3321                                 return -LIBBPF_ERRNO__FORMAT;
3322                         btf_data = data;
3323                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3324                         if (sh->sh_type != SHT_PROGBITS)
3325                                 return -LIBBPF_ERRNO__FORMAT;
3326                         btf_ext_data = data;
3327                 } else if (sh->sh_type == SHT_SYMTAB) {
3328                         /* already processed during the first pass above */
3329                 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3330                         if (sh->sh_flags & SHF_EXECINSTR) {
3331                                 if (strcmp(name, ".text") == 0)
3332                                         obj->efile.text_shndx = idx;
3333                                 err = bpf_object__add_programs(obj, data, name, idx);
3334                                 if (err)
3335                                         return err;
3336                         } else if (strcmp(name, DATA_SEC) == 0 ||
3337                                    str_has_pfx(name, DATA_SEC ".")) {
3338                                 sec_desc->sec_type = SEC_DATA;
3339                                 sec_desc->shdr = sh;
3340                                 sec_desc->data = data;
3341                         } else if (strcmp(name, RODATA_SEC) == 0 ||
3342                                    str_has_pfx(name, RODATA_SEC ".")) {
3343                                 sec_desc->sec_type = SEC_RODATA;
3344                                 sec_desc->shdr = sh;
3345                                 sec_desc->data = data;
3346                         } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
3347                                 obj->efile.st_ops_data = data;
3348                                 obj->efile.st_ops_shndx = idx;
3349                         } else {
3350                                 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3351                                         idx, name);
3352                         }
3353                 } else if (sh->sh_type == SHT_REL) {
3354                         int targ_sec_idx = sh->sh_info; /* points to other section */
3355
3356                         if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3357                             targ_sec_idx >= obj->efile.sec_cnt)
3358                                 return -LIBBPF_ERRNO__FORMAT;
3359
3360                         /* Only do relo for section with exec instructions */
3361                         if (!section_have_execinstr(obj, targ_sec_idx) &&
3362                             strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3363                             strcmp(name, ".rel" MAPS_ELF_SEC)) {
3364                                 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3365                                         idx, name, targ_sec_idx,
3366                                         elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
3367                                 continue;
3368                         }
3369
3370                         sec_desc->sec_type = SEC_RELO;
3371                         sec_desc->shdr = sh;
3372                         sec_desc->data = data;
3373                 } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
3374                         sec_desc->sec_type = SEC_BSS;
3375                         sec_desc->shdr = sh;
3376                         sec_desc->data = data;
3377                 } else {
3378                         pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3379                                 (size_t)sh->sh_size);
3380                 }
3381         }
3382
3383         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3384                 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3385                 return -LIBBPF_ERRNO__FORMAT;
3386         }
3387
3388         /* sort BPF programs by section name and in-section instruction offset
3389          * for faster search */
3390         if (obj->nr_programs)
3391                 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3392
3393         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3394 }
3395
3396 static bool sym_is_extern(const Elf64_Sym *sym)
3397 {
3398         int bind = ELF64_ST_BIND(sym->st_info);
3399         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3400         return sym->st_shndx == SHN_UNDEF &&
3401                (bind == STB_GLOBAL || bind == STB_WEAK) &&
3402                ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
3403 }
3404
3405 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
3406 {
3407         int bind = ELF64_ST_BIND(sym->st_info);
3408         int type = ELF64_ST_TYPE(sym->st_info);
3409
3410         /* in .text section */
3411         if (sym->st_shndx != text_shndx)
3412                 return false;
3413
3414         /* local function */
3415         if (bind == STB_LOCAL && type == STT_SECTION)
3416                 return true;
3417
3418         /* global function */
3419         return bind == STB_GLOBAL && type == STT_FUNC;
3420 }
3421
3422 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3423 {
3424         const struct btf_type *t;
3425         const char *tname;
3426         int i, n;
3427
3428         if (!btf)
3429                 return -ESRCH;
3430
3431         n = btf__type_cnt(btf);
3432         for (i = 1; i < n; i++) {
3433                 t = btf__type_by_id(btf, i);
3434
3435                 if (!btf_is_var(t) && !btf_is_func(t))
3436                         continue;
3437
3438                 tname = btf__name_by_offset(btf, t->name_off);
3439                 if (strcmp(tname, ext_name))
3440                         continue;
3441
3442                 if (btf_is_var(t) &&
3443                     btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
3444                         return -EINVAL;
3445
3446                 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
3447                         return -EINVAL;
3448
3449                 return i;
3450         }
3451
3452         return -ENOENT;
3453 }
3454
3455 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
3456         const struct btf_var_secinfo *vs;
3457         const struct btf_type *t;
3458         int i, j, n;
3459
3460         if (!btf)
3461                 return -ESRCH;
3462
3463         n = btf__type_cnt(btf);
3464         for (i = 1; i < n; i++) {
3465                 t = btf__type_by_id(btf, i);
3466
3467                 if (!btf_is_datasec(t))
3468                         continue;
3469
3470                 vs = btf_var_secinfos(t);
3471                 for (j = 0; j < btf_vlen(t); j++, vs++) {
3472                         if (vs->type == ext_btf_id)
3473                                 return i;
3474                 }
3475         }
3476
3477         return -ENOENT;
3478 }
3479
3480 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3481                                      bool *is_signed)
3482 {
3483         const struct btf_type *t;
3484         const char *name;
3485
3486         t = skip_mods_and_typedefs(btf, id, NULL);
3487         name = btf__name_by_offset(btf, t->name_off);
3488
3489         if (is_signed)
3490                 *is_signed = false;
3491         switch (btf_kind(t)) {
3492         case BTF_KIND_INT: {
3493                 int enc = btf_int_encoding(t);
3494
3495                 if (enc & BTF_INT_BOOL)
3496                         return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3497                 if (is_signed)
3498                         *is_signed = enc & BTF_INT_SIGNED;
3499                 if (t->size == 1)
3500                         return KCFG_CHAR;
3501                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3502                         return KCFG_UNKNOWN;
3503                 return KCFG_INT;
3504         }
3505         case BTF_KIND_ENUM:
3506                 if (t->size != 4)
3507                         return KCFG_UNKNOWN;
3508                 if (strcmp(name, "libbpf_tristate"))
3509                         return KCFG_UNKNOWN;
3510                 return KCFG_TRISTATE;
3511         case BTF_KIND_ENUM64:
3512                 if (strcmp(name, "libbpf_tristate"))
3513                         return KCFG_UNKNOWN;
3514                 return KCFG_TRISTATE;
3515         case BTF_KIND_ARRAY:
3516                 if (btf_array(t)->nelems == 0)
3517                         return KCFG_UNKNOWN;
3518                 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3519                         return KCFG_UNKNOWN;
3520                 return KCFG_CHAR_ARR;
3521         default:
3522                 return KCFG_UNKNOWN;
3523         }
3524 }
3525
3526 static int cmp_externs(const void *_a, const void *_b)
3527 {
3528         const struct extern_desc *a = _a;
3529         const struct extern_desc *b = _b;
3530
3531         if (a->type != b->type)
3532                 return a->type < b->type ? -1 : 1;
3533
3534         if (a->type == EXT_KCFG) {
3535                 /* descending order by alignment requirements */
3536                 if (a->kcfg.align != b->kcfg.align)
3537                         return a->kcfg.align > b->kcfg.align ? -1 : 1;
3538                 /* ascending order by size, within same alignment class */
3539                 if (a->kcfg.sz != b->kcfg.sz)
3540                         return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3541         }
3542
3543         /* resolve ties by name */
3544         return strcmp(a->name, b->name);
3545 }
3546
3547 static int find_int_btf_id(const struct btf *btf)
3548 {
3549         const struct btf_type *t;
3550         int i, n;
3551
3552         n = btf__type_cnt(btf);
3553         for (i = 1; i < n; i++) {
3554                 t = btf__type_by_id(btf, i);
3555
3556                 if (btf_is_int(t) && btf_int_bits(t) == 32)
3557                         return i;
3558         }
3559
3560         return 0;
3561 }
3562
3563 static int add_dummy_ksym_var(struct btf *btf)
3564 {
3565         int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
3566         const struct btf_var_secinfo *vs;
3567         const struct btf_type *sec;
3568
3569         if (!btf)
3570                 return 0;
3571
3572         sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
3573                                             BTF_KIND_DATASEC);
3574         if (sec_btf_id < 0)
3575                 return 0;
3576
3577         sec = btf__type_by_id(btf, sec_btf_id);
3578         vs = btf_var_secinfos(sec);
3579         for (i = 0; i < btf_vlen(sec); i++, vs++) {
3580                 const struct btf_type *vt;
3581
3582                 vt = btf__type_by_id(btf, vs->type);
3583                 if (btf_is_func(vt))
3584                         break;
3585         }
3586
3587         /* No func in ksyms sec.  No need to add dummy var. */
3588         if (i == btf_vlen(sec))
3589                 return 0;
3590
3591         int_btf_id = find_int_btf_id(btf);
3592         dummy_var_btf_id = btf__add_var(btf,
3593                                         "dummy_ksym",
3594                                         BTF_VAR_GLOBAL_ALLOCATED,
3595                                         int_btf_id);
3596         if (dummy_var_btf_id < 0)
3597                 pr_warn("cannot create a dummy_ksym var\n");
3598
3599         return dummy_var_btf_id;
3600 }
3601
3602 static int bpf_object__collect_externs(struct bpf_object *obj)
3603 {
3604         struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3605         const struct btf_type *t;
3606         struct extern_desc *ext;
3607         int i, n, off, dummy_var_btf_id;
3608         const char *ext_name, *sec_name;
3609         Elf_Scn *scn;
3610         Elf64_Shdr *sh;
3611
3612         if (!obj->efile.symbols)
3613                 return 0;
3614
3615         scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3616         sh = elf_sec_hdr(obj, scn);
3617         if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
3618                 return -LIBBPF_ERRNO__FORMAT;
3619
3620         dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
3621         if (dummy_var_btf_id < 0)
3622                 return dummy_var_btf_id;
3623
3624         n = sh->sh_size / sh->sh_entsize;
3625         pr_debug("looking for externs among %d symbols...\n", n);
3626
3627         for (i = 0; i < n; i++) {
3628                 Elf64_Sym *sym = elf_sym_by_idx(obj, i);
3629
3630                 if (!sym)
3631                         return -LIBBPF_ERRNO__FORMAT;
3632                 if (!sym_is_extern(sym))
3633                         continue;
3634                 ext_name = elf_sym_str(obj, sym->st_name);
3635                 if (!ext_name || !ext_name[0])
3636                         continue;
3637
3638                 ext = obj->externs;
3639                 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3640                 if (!ext)
3641                         return -ENOMEM;
3642                 obj->externs = ext;
3643                 ext = &ext[obj->nr_extern];
3644                 memset(ext, 0, sizeof(*ext));
3645                 obj->nr_extern++;
3646
3647                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3648                 if (ext->btf_id <= 0) {
3649                         pr_warn("failed to find BTF for extern '%s': %d\n",
3650                                 ext_name, ext->btf_id);
3651                         return ext->btf_id;
3652                 }
3653                 t = btf__type_by_id(obj->btf, ext->btf_id);
3654                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
3655                 ext->sym_idx = i;
3656                 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
3657
3658                 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3659                 if (ext->sec_btf_id <= 0) {
3660                         pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3661                                 ext_name, ext->btf_id, ext->sec_btf_id);
3662                         return ext->sec_btf_id;
3663                 }
3664                 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3665                 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3666
3667                 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3668                         if (btf_is_func(t)) {
3669                                 pr_warn("extern function %s is unsupported under %s section\n",
3670                                         ext->name, KCONFIG_SEC);
3671                                 return -ENOTSUP;
3672                         }
3673                         kcfg_sec = sec;
3674                         ext->type = EXT_KCFG;
3675                         ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3676                         if (ext->kcfg.sz <= 0) {
3677                                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3678                                         ext_name, ext->kcfg.sz);
3679                                 return ext->kcfg.sz;
3680                         }
3681                         ext->kcfg.align = btf__align_of(obj->btf, t->type);
3682                         if (ext->kcfg.align <= 0) {
3683                                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3684                                         ext_name, ext->kcfg.align);
3685                                 return -EINVAL;
3686                         }
3687                         ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3688                                                         &ext->kcfg.is_signed);
3689                         if (ext->kcfg.type == KCFG_UNKNOWN) {
3690                                 pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
3691                                 return -ENOTSUP;
3692                         }
3693                 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3694                         ksym_sec = sec;
3695                         ext->type = EXT_KSYM;
3696                         skip_mods_and_typedefs(obj->btf, t->type,
3697                                                &ext->ksym.type_id);
3698                 } else {
3699                         pr_warn("unrecognized extern section '%s'\n", sec_name);
3700                         return -ENOTSUP;
3701                 }
3702         }
3703         pr_debug("collected %d externs total\n", obj->nr_extern);
3704
3705         if (!obj->nr_extern)
3706                 return 0;
3707
3708         /* sort externs by type, for kcfg ones also by (align, size, name) */
3709         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3710
3711         /* for .ksyms section, we need to turn all externs into allocated
3712          * variables in BTF to pass kernel verification; we do this by
3713          * pretending that each extern is a 8-byte variable
3714          */
3715         if (ksym_sec) {
3716                 /* find existing 4-byte integer type in BTF to use for fake
3717                  * extern variables in DATASEC
3718                  */
3719                 int int_btf_id = find_int_btf_id(obj->btf);
3720                 /* For extern function, a dummy_var added earlier
3721                  * will be used to replace the vs->type and
3722                  * its name string will be used to refill
3723                  * the missing param's name.
3724                  */
3725                 const struct btf_type *dummy_var;
3726
3727                 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
3728                 for (i = 0; i < obj->nr_extern; i++) {
3729                         ext = &obj->externs[i];
3730                         if (ext->type != EXT_KSYM)
3731                                 continue;
3732                         pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3733                                  i, ext->sym_idx, ext->name);
3734                 }
3735
3736                 sec = ksym_sec;
3737                 n = btf_vlen(sec);
3738                 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3739                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3740                         struct btf_type *vt;
3741
3742                         vt = (void *)btf__type_by_id(obj->btf, vs->type);
3743                         ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3744                         ext = find_extern_by_name(obj, ext_name);
3745                         if (!ext) {
3746                                 pr_warn("failed to find extern definition for BTF %s '%s'\n",
3747                                         btf_kind_str(vt), ext_name);
3748                                 return -ESRCH;
3749                         }
3750                         if (btf_is_func(vt)) {
3751                                 const struct btf_type *func_proto;
3752                                 struct btf_param *param;
3753                                 int j;
3754
3755                                 func_proto = btf__type_by_id(obj->btf,
3756                                                              vt->type);
3757                                 param = btf_params(func_proto);
3758                                 /* Reuse the dummy_var string if the
3759                                  * func proto does not have param name.
3760                                  */
3761                                 for (j = 0; j < btf_vlen(func_proto); j++)
3762                                         if (param[j].type && !param[j].name_off)
3763                                                 param[j].name_off =
3764                                                         dummy_var->name_off;
3765                                 vs->type = dummy_var_btf_id;
3766                                 vt->info &= ~0xffff;
3767                                 vt->info |= BTF_FUNC_GLOBAL;
3768                         } else {
3769                                 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3770                                 vt->type = int_btf_id;
3771                         }
3772                         vs->offset = off;
3773                         vs->size = sizeof(int);
3774                 }
3775                 sec->size = off;
3776         }
3777
3778         if (kcfg_sec) {
3779                 sec = kcfg_sec;
3780                 /* for kcfg externs calculate their offsets within a .kconfig map */
3781                 off = 0;
3782                 for (i = 0; i < obj->nr_extern; i++) {
3783                         ext = &obj->externs[i];
3784                         if (ext->type != EXT_KCFG)
3785                                 continue;
3786
3787                         ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3788                         off = ext->kcfg.data_off + ext->kcfg.sz;
3789                         pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3790                                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3791                 }
3792                 sec->size = off;
3793                 n = btf_vlen(sec);
3794                 for (i = 0; i < n; i++) {
3795                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3796
3797                         t = btf__type_by_id(obj->btf, vs->type);
3798                         ext_name = btf__name_by_offset(obj->btf, t->name_off);
3799                         ext = find_extern_by_name(obj, ext_name);
3800                         if (!ext) {
3801                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3802                                         ext_name);
3803                                 return -ESRCH;
3804                         }
3805                         btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3806                         vs->offset = ext->kcfg.data_off;
3807                 }
3808         }
3809         return 0;
3810 }
3811
3812 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
3813 {
3814         return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
3815 }
3816
3817 struct bpf_program *
3818 bpf_object__find_program_by_name(const struct bpf_object *obj,
3819                                  const char *name)
3820 {
3821         struct bpf_program *prog;
3822
3823         bpf_object__for_each_program(prog, obj) {
3824                 if (prog_is_subprog(obj, prog))
3825                         continue;
3826                 if (!strcmp(prog->name, name))
3827                         return prog;
3828         }
3829         return errno = ENOENT, NULL;
3830 }
3831
3832 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3833                                       int shndx)
3834 {
3835         switch (obj->efile.secs[shndx].sec_type) {
3836         case SEC_BSS:
3837         case SEC_DATA:
3838         case SEC_RODATA:
3839                 return true;
3840         default:
3841                 return false;
3842         }
3843 }
3844
3845 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
3846                                       int shndx)
3847 {
3848         return shndx == obj->efile.maps_shndx ||
3849                shndx == obj->efile.btf_maps_shndx;
3850 }
3851
3852 static enum libbpf_map_type
3853 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
3854 {
3855         if (shndx == obj->efile.symbols_shndx)
3856                 return LIBBPF_MAP_KCONFIG;
3857
3858         switch (obj->efile.secs[shndx].sec_type) {
3859         case SEC_BSS:
3860                 return LIBBPF_MAP_BSS;
3861         case SEC_DATA:
3862                 return LIBBPF_MAP_DATA;
3863         case SEC_RODATA:
3864                 return LIBBPF_MAP_RODATA;
3865         default:
3866                 return LIBBPF_MAP_UNSPEC;
3867         }
3868 }
3869
3870 static int bpf_program__record_reloc(struct bpf_program *prog,
3871                                      struct reloc_desc *reloc_desc,
3872                                      __u32 insn_idx, const char *sym_name,
3873                                      const Elf64_Sym *sym, const Elf64_Rel *rel)
3874 {
3875         struct bpf_insn *insn = &prog->insns[insn_idx];
3876         size_t map_idx, nr_maps = prog->obj->nr_maps;
3877         struct bpf_object *obj = prog->obj;
3878         __u32 shdr_idx = sym->st_shndx;
3879         enum libbpf_map_type type;
3880         const char *sym_sec_name;
3881         struct bpf_map *map;
3882
3883         if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
3884                 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
3885                         prog->name, sym_name, insn_idx, insn->code);
3886                 return -LIBBPF_ERRNO__RELOC;
3887         }
3888
3889         if (sym_is_extern(sym)) {
3890                 int sym_idx = ELF64_R_SYM(rel->r_info);
3891                 int i, n = obj->nr_extern;
3892                 struct extern_desc *ext;
3893
3894                 for (i = 0; i < n; i++) {
3895                         ext = &obj->externs[i];
3896                         if (ext->sym_idx == sym_idx)
3897                                 break;
3898                 }
3899                 if (i >= n) {
3900                         pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
3901                                 prog->name, sym_name, sym_idx);
3902                         return -LIBBPF_ERRNO__RELOC;
3903                 }
3904                 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
3905                          prog->name, i, ext->name, ext->sym_idx, insn_idx);
3906                 if (insn->code == (BPF_JMP | BPF_CALL))
3907                         reloc_desc->type = RELO_EXTERN_FUNC;
3908                 else
3909                         reloc_desc->type = RELO_EXTERN_VAR;
3910                 reloc_desc->insn_idx = insn_idx;
3911                 reloc_desc->sym_off = i; /* sym_off stores extern index */
3912                 return 0;
3913         }
3914
3915         /* sub-program call relocation */
3916         if (is_call_insn(insn)) {
3917                 if (insn->src_reg != BPF_PSEUDO_CALL) {
3918                         pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
3919                         return -LIBBPF_ERRNO__RELOC;
3920                 }
3921                 /* text_shndx can be 0, if no default "main" program exists */
3922                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
3923                         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3924                         pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
3925                                 prog->name, sym_name, sym_sec_name);
3926                         return -LIBBPF_ERRNO__RELOC;
3927                 }
3928                 if (sym->st_value % BPF_INSN_SZ) {
3929                         pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
3930                                 prog->name, sym_name, (size_t)sym->st_value);
3931                         return -LIBBPF_ERRNO__RELOC;
3932                 }
3933                 reloc_desc->type = RELO_CALL;
3934                 reloc_desc->insn_idx = insn_idx;
3935                 reloc_desc->sym_off = sym->st_value;
3936                 return 0;
3937         }
3938
3939         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
3940                 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
3941                         prog->name, sym_name, shdr_idx);
3942                 return -LIBBPF_ERRNO__RELOC;
3943         }
3944
3945         /* loading subprog addresses */
3946         if (sym_is_subprog(sym, obj->efile.text_shndx)) {
3947                 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
3948                  * local_func: sym->st_value = 0, insn->imm = offset in the section.
3949                  */
3950                 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
3951                         pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
3952                                 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
3953                         return -LIBBPF_ERRNO__RELOC;
3954                 }
3955
3956                 reloc_desc->type = RELO_SUBPROG_ADDR;
3957                 reloc_desc->insn_idx = insn_idx;
3958                 reloc_desc->sym_off = sym->st_value;
3959                 return 0;
3960         }
3961
3962         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
3963         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3964
3965         /* generic map reference relocation */
3966         if (type == LIBBPF_MAP_UNSPEC) {
3967                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
3968                         pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
3969                                 prog->name, sym_name, sym_sec_name);
3970                         return -LIBBPF_ERRNO__RELOC;
3971                 }
3972                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3973                         map = &obj->maps[map_idx];
3974                         if (map->libbpf_type != type ||
3975                             map->sec_idx != sym->st_shndx ||
3976                             map->sec_offset != sym->st_value)
3977                                 continue;
3978                         pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
3979                                  prog->name, map_idx, map->name, map->sec_idx,
3980                                  map->sec_offset, insn_idx);
3981                         break;
3982                 }
3983                 if (map_idx >= nr_maps) {
3984                         pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
3985                                 prog->name, sym_sec_name, (size_t)sym->st_value);
3986                         return -LIBBPF_ERRNO__RELOC;
3987                 }
3988                 reloc_desc->type = RELO_LD64;
3989                 reloc_desc->insn_idx = insn_idx;
3990                 reloc_desc->map_idx = map_idx;
3991                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
3992                 return 0;
3993         }
3994
3995         /* global data map relocation */
3996         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
3997                 pr_warn("prog '%s': bad data relo against section '%s'\n",
3998                         prog->name, sym_sec_name);
3999                 return -LIBBPF_ERRNO__RELOC;
4000         }
4001         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4002                 map = &obj->maps[map_idx];
4003                 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4004                         continue;
4005                 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4006                          prog->name, map_idx, map->name, map->sec_idx,
4007                          map->sec_offset, insn_idx);
4008                 break;
4009         }
4010         if (map_idx >= nr_maps) {
4011                 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4012                         prog->name, sym_sec_name);
4013                 return -LIBBPF_ERRNO__RELOC;
4014         }
4015
4016         reloc_desc->type = RELO_DATA;
4017         reloc_desc->insn_idx = insn_idx;
4018         reloc_desc->map_idx = map_idx;
4019         reloc_desc->sym_off = sym->st_value;
4020         return 0;
4021 }
4022
4023 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4024 {
4025         return insn_idx >= prog->sec_insn_off &&
4026                insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4027 }
4028
4029 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4030                                                  size_t sec_idx, size_t insn_idx)
4031 {
4032         int l = 0, r = obj->nr_programs - 1, m;
4033         struct bpf_program *prog;
4034
4035         while (l < r) {
4036                 m = l + (r - l + 1) / 2;
4037                 prog = &obj->programs[m];
4038
4039                 if (prog->sec_idx < sec_idx ||
4040                     (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4041                         l = m;
4042                 else
4043                         r = m - 1;
4044         }
4045         /* matching program could be at index l, but it still might be the
4046          * wrong one, so we need to double check conditions for the last time
4047          */
4048         prog = &obj->programs[l];
4049         if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4050                 return prog;
4051         return NULL;
4052 }
4053
4054 static int
4055 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4056 {
4057         const char *relo_sec_name, *sec_name;
4058         size_t sec_idx = shdr->sh_info, sym_idx;
4059         struct bpf_program *prog;
4060         struct reloc_desc *relos;
4061         int err, i, nrels;
4062         const char *sym_name;
4063         __u32 insn_idx;
4064         Elf_Scn *scn;
4065         Elf_Data *scn_data;
4066         Elf64_Sym *sym;
4067         Elf64_Rel *rel;
4068
4069         if (sec_idx >= obj->efile.sec_cnt)
4070                 return -EINVAL;
4071
4072         scn = elf_sec_by_idx(obj, sec_idx);
4073         scn_data = elf_sec_data(obj, scn);
4074
4075         relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4076         sec_name = elf_sec_name(obj, scn);
4077         if (!relo_sec_name || !sec_name)
4078                 return -EINVAL;
4079
4080         pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4081                  relo_sec_name, sec_idx, sec_name);
4082         nrels = shdr->sh_size / shdr->sh_entsize;
4083
4084         for (i = 0; i < nrels; i++) {
4085                 rel = elf_rel_by_idx(data, i);
4086                 if (!rel) {
4087                         pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4088                         return -LIBBPF_ERRNO__FORMAT;
4089                 }
4090
4091                 sym_idx = ELF64_R_SYM(rel->r_info);
4092                 sym = elf_sym_by_idx(obj, sym_idx);
4093                 if (!sym) {
4094                         pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4095                                 relo_sec_name, sym_idx, i);
4096                         return -LIBBPF_ERRNO__FORMAT;
4097                 }
4098
4099                 if (sym->st_shndx >= obj->efile.sec_cnt) {
4100                         pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4101                                 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4102                         return -LIBBPF_ERRNO__FORMAT;
4103                 }
4104
4105                 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4106                         pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4107                                 relo_sec_name, (size_t)rel->r_offset, i);
4108                         return -LIBBPF_ERRNO__FORMAT;
4109                 }
4110
4111                 insn_idx = rel->r_offset / BPF_INSN_SZ;
4112                 /* relocations against static functions are recorded as
4113                  * relocations against the section that contains a function;
4114                  * in such case, symbol will be STT_SECTION and sym.st_name
4115                  * will point to empty string (0), so fetch section name
4116                  * instead
4117                  */
4118                 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4119                         sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4120                 else
4121                         sym_name = elf_sym_str(obj, sym->st_name);
4122                 sym_name = sym_name ?: "<?";
4123
4124                 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4125                          relo_sec_name, i, insn_idx, sym_name);
4126
4127                 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4128                 if (!prog) {
4129                         pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4130                                 relo_sec_name, i, sec_name, insn_idx);
4131                         continue;
4132                 }
4133
4134                 relos = libbpf_reallocarray(prog->reloc_desc,
4135                                             prog->nr_reloc + 1, sizeof(*relos));
4136                 if (!relos)
4137                         return -ENOMEM;
4138                 prog->reloc_desc = relos;
4139
4140                 /* adjust insn_idx to local BPF program frame of reference */
4141                 insn_idx -= prog->sec_insn_off;
4142                 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4143                                                 insn_idx, sym_name, sym, rel);
4144                 if (err)
4145                         return err;
4146
4147                 prog->nr_reloc++;
4148         }
4149         return 0;
4150 }
4151
4152 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
4153 {
4154         int id;
4155
4156         if (!obj->btf)
4157                 return -ENOENT;
4158
4159         /* if it's BTF-defined map, we don't need to search for type IDs.
4160          * For struct_ops map, it does not need btf_key_type_id and
4161          * btf_value_type_id.
4162          */
4163         if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4164                 return 0;
4165
4166         /*
4167          * LLVM annotates global data differently in BTF, that is,
4168          * only as '.data', '.bss' or '.rodata'.
4169          */
4170         if (!bpf_map__is_internal(map))
4171                 return -ENOENT;
4172
4173         id = btf__find_by_name(obj->btf, map->real_name);
4174         if (id < 0)
4175                 return id;
4176
4177         map->btf_key_type_id = 0;
4178         map->btf_value_type_id = id;
4179         return 0;
4180 }
4181
4182 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4183 {
4184         char file[PATH_MAX], buff[4096];
4185         FILE *fp;
4186         __u32 val;
4187         int err;
4188
4189         snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4190         memset(info, 0, sizeof(*info));
4191
4192         fp = fopen(file, "r");
4193         if (!fp) {
4194                 err = -errno;
4195                 pr_warn("failed to open %s: %d. No procfs support?\n", file,
4196                         err);
4197                 return err;
4198         }
4199
4200         while (fgets(buff, sizeof(buff), fp)) {
4201                 if (sscanf(buff, "map_type:\t%u", &val) == 1)
4202                         info->type = val;
4203                 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4204                         info->key_size = val;
4205                 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4206                         info->value_size = val;
4207                 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4208                         info->max_entries = val;
4209                 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4210                         info->map_flags = val;
4211         }
4212
4213         fclose(fp);
4214
4215         return 0;
4216 }
4217
4218 bool bpf_map__autocreate(const struct bpf_map *map)
4219 {
4220         return map->autocreate;
4221 }
4222
4223 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4224 {
4225         if (map->obj->loaded)
4226                 return libbpf_err(-EBUSY);
4227
4228         map->autocreate = autocreate;
4229         return 0;
4230 }
4231
4232 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4233 {
4234         struct bpf_map_info info = {};
4235         __u32 len = sizeof(info);
4236         int new_fd, err;
4237         char *new_name;
4238
4239         err = bpf_obj_get_info_by_fd(fd, &info, &len);
4240         if (err && errno == EINVAL)
4241                 err = bpf_get_map_info_from_fdinfo(fd, &info);
4242         if (err)
4243                 return libbpf_err(err);
4244
4245         new_name = strdup(info.name);
4246         if (!new_name)
4247                 return libbpf_err(-errno);
4248
4249         new_fd = open("/", O_RDONLY | O_CLOEXEC);
4250         if (new_fd < 0) {
4251                 err = -errno;
4252                 goto err_free_new_name;
4253         }
4254
4255         new_fd = dup3(fd, new_fd, O_CLOEXEC);
4256         if (new_fd < 0) {
4257                 err = -errno;
4258                 goto err_close_new_fd;
4259         }
4260
4261         err = zclose(map->fd);
4262         if (err) {
4263                 err = -errno;
4264                 goto err_close_new_fd;
4265         }
4266         free(map->name);
4267
4268         map->fd = new_fd;
4269         map->name = new_name;
4270         map->def.type = info.type;
4271         map->def.key_size = info.key_size;
4272         map->def.value_size = info.value_size;
4273         map->def.max_entries = info.max_entries;
4274         map->def.map_flags = info.map_flags;
4275         map->btf_key_type_id = info.btf_key_type_id;
4276         map->btf_value_type_id = info.btf_value_type_id;
4277         map->reused = true;
4278         map->map_extra = info.map_extra;
4279
4280         return 0;
4281
4282 err_close_new_fd:
4283         close(new_fd);
4284 err_free_new_name:
4285         free(new_name);
4286         return libbpf_err(err);
4287 }
4288
4289 __u32 bpf_map__max_entries(const struct bpf_map *map)
4290 {
4291         return map->def.max_entries;
4292 }
4293
4294 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4295 {
4296         if (!bpf_map_type__is_map_in_map(map->def.type))
4297                 return errno = EINVAL, NULL;
4298
4299         return map->inner_map;
4300 }
4301
4302 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
4303 {
4304         if (map->fd >= 0)
4305                 return libbpf_err(-EBUSY);
4306         map->def.max_entries = max_entries;
4307         return 0;
4308 }
4309
4310 static int
4311 bpf_object__probe_loading(struct bpf_object *obj)
4312 {
4313         char *cp, errmsg[STRERR_BUFSIZE];
4314         struct bpf_insn insns[] = {
4315                 BPF_MOV64_IMM(BPF_REG_0, 0),
4316                 BPF_EXIT_INSN(),
4317         };
4318         int ret, insn_cnt = ARRAY_SIZE(insns);
4319
4320         if (obj->gen_loader)
4321                 return 0;
4322
4323         ret = bump_rlimit_memlock();
4324         if (ret)
4325                 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
4326
4327         /* make sure basic loading works */
4328         ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4329         if (ret < 0)
4330                 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
4331         if (ret < 0) {
4332                 ret = errno;
4333                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4334                 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
4335                         "program. Make sure your kernel supports BPF "
4336                         "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
4337                         "set to big enough value.\n", __func__, cp, ret);
4338                 return -ret;
4339         }
4340         close(ret);
4341
4342         return 0;
4343 }
4344
4345 static int probe_fd(int fd)
4346 {
4347         if (fd >= 0)
4348                 close(fd);
4349         return fd >= 0;
4350 }
4351
4352 static int probe_kern_prog_name(void)
4353 {
4354         struct bpf_insn insns[] = {
4355                 BPF_MOV64_IMM(BPF_REG_0, 0),
4356                 BPF_EXIT_INSN(),
4357         };
4358         int ret, insn_cnt = ARRAY_SIZE(insns);
4359
4360         /* make sure loading with name works */
4361         ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL);
4362         return probe_fd(ret);
4363 }
4364
4365 static int probe_kern_global_data(void)
4366 {
4367         char *cp, errmsg[STRERR_BUFSIZE];
4368         struct bpf_insn insns[] = {
4369                 BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
4370                 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
4371                 BPF_MOV64_IMM(BPF_REG_0, 0),
4372                 BPF_EXIT_INSN(),
4373         };
4374         int ret, map, insn_cnt = ARRAY_SIZE(insns);
4375
4376         map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);
4377         if (map < 0) {
4378                 ret = -errno;
4379                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4380                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4381                         __func__, cp, -ret);
4382                 return ret;
4383         }
4384
4385         insns[0].imm = map;
4386
4387         ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4388         close(map);
4389         return probe_fd(ret);
4390 }
4391
4392 static int probe_kern_btf(void)
4393 {
4394         static const char strs[] = "\0int";
4395         __u32 types[] = {
4396                 /* int */
4397                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4398         };
4399
4400         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4401                                              strs, sizeof(strs)));
4402 }
4403
4404 static int probe_kern_btf_func(void)
4405 {
4406         static const char strs[] = "\0int\0x\0a";
4407         /* void x(int a) {} */
4408         __u32 types[] = {
4409                 /* int */
4410                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4411                 /* FUNC_PROTO */                                /* [2] */
4412                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4413                 BTF_PARAM_ENC(7, 1),
4414                 /* FUNC x */                                    /* [3] */
4415                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
4416         };
4417
4418         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4419                                              strs, sizeof(strs)));
4420 }
4421
4422 static int probe_kern_btf_func_global(void)
4423 {
4424         static const char strs[] = "\0int\0x\0a";
4425         /* static void x(int a) {} */
4426         __u32 types[] = {
4427                 /* int */
4428                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4429                 /* FUNC_PROTO */                                /* [2] */
4430                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4431                 BTF_PARAM_ENC(7, 1),
4432                 /* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
4433                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
4434         };
4435
4436         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4437                                              strs, sizeof(strs)));
4438 }
4439
4440 static int probe_kern_btf_datasec(void)
4441 {
4442         static const char strs[] = "\0x\0.data";
4443         /* static int a; */
4444         __u32 types[] = {
4445                 /* int */
4446                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4447                 /* VAR x */                                     /* [2] */
4448                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
4449                 BTF_VAR_STATIC,
4450                 /* DATASEC val */                               /* [3] */
4451                 BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
4452                 BTF_VAR_SECINFO_ENC(2, 0, 4),
4453         };
4454
4455         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4456                                              strs, sizeof(strs)));
4457 }
4458
4459 static int probe_kern_btf_float(void)
4460 {
4461         static const char strs[] = "\0float";
4462         __u32 types[] = {
4463                 /* float */
4464                 BTF_TYPE_FLOAT_ENC(1, 4),
4465         };
4466
4467         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4468                                              strs, sizeof(strs)));
4469 }
4470
4471 static int probe_kern_btf_decl_tag(void)
4472 {
4473         static const char strs[] = "\0tag";
4474         __u32 types[] = {
4475                 /* int */
4476                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4477                 /* VAR x */                                     /* [2] */
4478                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
4479                 BTF_VAR_STATIC,
4480                 /* attr */
4481                 BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
4482         };
4483
4484         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4485                                              strs, sizeof(strs)));
4486 }
4487
4488 static int probe_kern_btf_type_tag(void)
4489 {
4490         static const char strs[] = "\0tag";
4491         __u32 types[] = {
4492                 /* int */
4493                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),          /* [1] */
4494                 /* attr */
4495                 BTF_TYPE_TYPE_TAG_ENC(1, 1),                            /* [2] */
4496                 /* ptr */
4497                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),   /* [3] */
4498         };
4499
4500         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4501                                              strs, sizeof(strs)));
4502 }
4503
4504 static int probe_kern_array_mmap(void)
4505 {
4506         LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
4507         int fd;
4508
4509         fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts);
4510         return probe_fd(fd);
4511 }
4512
4513 static int probe_kern_exp_attach_type(void)
4514 {
4515         LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
4516         struct bpf_insn insns[] = {
4517                 BPF_MOV64_IMM(BPF_REG_0, 0),
4518                 BPF_EXIT_INSN(),
4519         };
4520         int fd, insn_cnt = ARRAY_SIZE(insns);
4521
4522         /* use any valid combination of program type and (optional)
4523          * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
4524          * to see if kernel supports expected_attach_type field for
4525          * BPF_PROG_LOAD command
4526          */
4527         fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
4528         return probe_fd(fd);
4529 }
4530
4531 static int probe_kern_probe_read_kernel(void)
4532 {
4533         struct bpf_insn insns[] = {
4534                 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),   /* r1 = r10 (fp) */
4535                 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),  /* r1 += -8 */
4536                 BPF_MOV64_IMM(BPF_REG_2, 8),            /* r2 = 8 */
4537                 BPF_MOV64_IMM(BPF_REG_3, 0),            /* r3 = 0 */
4538                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
4539                 BPF_EXIT_INSN(),
4540         };
4541         int fd, insn_cnt = ARRAY_SIZE(insns);
4542
4543         fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
4544         return probe_fd(fd);
4545 }
4546
4547 static int probe_prog_bind_map(void)
4548 {
4549         char *cp, errmsg[STRERR_BUFSIZE];
4550         struct bpf_insn insns[] = {
4551                 BPF_MOV64_IMM(BPF_REG_0, 0),
4552                 BPF_EXIT_INSN(),
4553         };
4554         int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
4555
4556         map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);
4557         if (map < 0) {
4558                 ret = -errno;
4559                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4560                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4561                         __func__, cp, -ret);
4562                 return ret;
4563         }
4564
4565         prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4566         if (prog < 0) {
4567                 close(map);
4568                 return 0;
4569         }
4570
4571         ret = bpf_prog_bind_map(prog, map, NULL);
4572
4573         close(map);
4574         close(prog);
4575
4576         return ret >= 0;
4577 }
4578
4579 static int probe_module_btf(void)
4580 {
4581         static const char strs[] = "\0int";
4582         __u32 types[] = {
4583                 /* int */
4584                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4585         };
4586         struct bpf_btf_info info;
4587         __u32 len = sizeof(info);
4588         char name[16];
4589         int fd, err;
4590
4591         fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
4592         if (fd < 0)
4593                 return 0; /* BTF not supported at all */
4594
4595         memset(&info, 0, sizeof(info));
4596         info.name = ptr_to_u64(name);
4597         info.name_len = sizeof(name);
4598
4599         /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
4600          * kernel's module BTF support coincides with support for
4601          * name/name_len fields in struct bpf_btf_info.
4602          */
4603         err = bpf_obj_get_info_by_fd(fd, &info, &len);
4604         close(fd);
4605         return !err;
4606 }
4607
4608 static int probe_perf_link(void)
4609 {
4610         struct bpf_insn insns[] = {
4611                 BPF_MOV64_IMM(BPF_REG_0, 0),
4612                 BPF_EXIT_INSN(),
4613         };
4614         int prog_fd, link_fd, err;
4615
4616         prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
4617                                 insns, ARRAY_SIZE(insns), NULL);
4618         if (prog_fd < 0)
4619                 return -errno;
4620
4621         /* use invalid perf_event FD to get EBADF, if link is supported;
4622          * otherwise EINVAL should be returned
4623          */
4624         link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
4625         err = -errno; /* close() can clobber errno */
4626
4627         if (link_fd >= 0)
4628                 close(link_fd);
4629         close(prog_fd);
4630
4631         return link_fd < 0 && err == -EBADF;
4632 }
4633
4634 static int probe_kern_bpf_cookie(void)
4635 {
4636         struct bpf_insn insns[] = {
4637                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
4638                 BPF_EXIT_INSN(),
4639         };
4640         int ret, insn_cnt = ARRAY_SIZE(insns);
4641
4642         ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
4643         return probe_fd(ret);
4644 }
4645
4646 static int probe_kern_btf_enum64(void)
4647 {
4648         static const char strs[] = "\0enum64";
4649         __u32 types[] = {
4650                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
4651         };
4652
4653         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4654                                              strs, sizeof(strs)));
4655 }
4656
4657 enum kern_feature_result {
4658         FEAT_UNKNOWN = 0,
4659         FEAT_SUPPORTED = 1,
4660         FEAT_MISSING = 2,
4661 };
4662
4663 typedef int (*feature_probe_fn)(void);
4664
4665 static struct kern_feature_desc {
4666         const char *desc;
4667         feature_probe_fn probe;
4668         enum kern_feature_result res;
4669 } feature_probes[__FEAT_CNT] = {
4670         [FEAT_PROG_NAME] = {
4671                 "BPF program name", probe_kern_prog_name,
4672         },
4673         [FEAT_GLOBAL_DATA] = {
4674                 "global variables", probe_kern_global_data,
4675         },
4676         [FEAT_BTF] = {
4677                 "minimal BTF", probe_kern_btf,
4678         },
4679         [FEAT_BTF_FUNC] = {
4680                 "BTF functions", probe_kern_btf_func,
4681         },
4682         [FEAT_BTF_GLOBAL_FUNC] = {
4683                 "BTF global function", probe_kern_btf_func_global,
4684         },
4685         [FEAT_BTF_DATASEC] = {
4686                 "BTF data section and variable", probe_kern_btf_datasec,
4687         },
4688         [FEAT_ARRAY_MMAP] = {
4689                 "ARRAY map mmap()", probe_kern_array_mmap,
4690         },
4691         [FEAT_EXP_ATTACH_TYPE] = {
4692                 "BPF_PROG_LOAD expected_attach_type attribute",
4693                 probe_kern_exp_attach_type,
4694         },
4695         [FEAT_PROBE_READ_KERN] = {
4696                 "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
4697         },
4698         [FEAT_PROG_BIND_MAP] = {
4699                 "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
4700         },
4701         [FEAT_MODULE_BTF] = {
4702                 "module BTF support", probe_module_btf,
4703         },
4704         [FEAT_BTF_FLOAT] = {
4705                 "BTF_KIND_FLOAT support", probe_kern_btf_float,
4706         },
4707         [FEAT_PERF_LINK] = {
4708                 "BPF perf link support", probe_perf_link,
4709         },
4710         [FEAT_BTF_DECL_TAG] = {
4711                 "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
4712         },
4713         [FEAT_BTF_TYPE_TAG] = {
4714                 "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
4715         },
4716         [FEAT_MEMCG_ACCOUNT] = {
4717                 "memcg-based memory accounting", probe_memcg_account,
4718         },
4719         [FEAT_BPF_COOKIE] = {
4720                 "BPF cookie support", probe_kern_bpf_cookie,
4721         },
4722         [FEAT_BTF_ENUM64] = {
4723                 "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
4724         },
4725 };
4726
4727 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
4728 {
4729         struct kern_feature_desc *feat = &feature_probes[feat_id];
4730         int ret;
4731
4732         if (obj && obj->gen_loader)
4733                 /* To generate loader program assume the latest kernel
4734                  * to avoid doing extra prog_load, map_create syscalls.
4735                  */
4736                 return true;
4737
4738         if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
4739                 ret = feat->probe();
4740                 if (ret > 0) {
4741                         WRITE_ONCE(feat->res, FEAT_SUPPORTED);
4742                 } else if (ret == 0) {
4743                         WRITE_ONCE(feat->res, FEAT_MISSING);
4744                 } else {
4745                         pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
4746                         WRITE_ONCE(feat->res, FEAT_MISSING);
4747                 }
4748         }
4749
4750         return READ_ONCE(feat->res) == FEAT_SUPPORTED;
4751 }
4752
4753 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4754 {
4755         struct bpf_map_info map_info = {};
4756         char msg[STRERR_BUFSIZE];
4757         __u32 map_info_len;
4758         int err;
4759
4760         map_info_len = sizeof(map_info);
4761
4762         err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
4763         if (err && errno == EINVAL)
4764                 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4765         if (err) {
4766                 pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4767                         libbpf_strerror_r(errno, msg, sizeof(msg)));
4768                 return false;
4769         }
4770
4771         return (map_info.type == map->def.type &&
4772                 map_info.key_size == map->def.key_size &&
4773                 map_info.value_size == map->def.value_size &&
4774                 map_info.max_entries == map->def.max_entries &&
4775                 map_info.map_flags == map->def.map_flags &&
4776                 map_info.map_extra == map->map_extra);
4777 }
4778
4779 static int
4780 bpf_object__reuse_map(struct bpf_map *map)
4781 {
4782         char *cp, errmsg[STRERR_BUFSIZE];
4783         int err, pin_fd;
4784
4785         pin_fd = bpf_obj_get(map->pin_path);
4786         if (pin_fd < 0) {
4787                 err = -errno;
4788                 if (err == -ENOENT) {
4789                         pr_debug("found no pinned map to reuse at '%s'\n",
4790                                  map->pin_path);
4791                         return 0;
4792                 }
4793
4794                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4795                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
4796                         map->pin_path, cp);
4797                 return err;
4798         }
4799
4800         if (!map_is_reuse_compat(map, pin_fd)) {
4801                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4802                         map->pin_path);
4803                 close(pin_fd);
4804                 return -EINVAL;
4805         }
4806
4807         err = bpf_map__reuse_fd(map, pin_fd);
4808         close(pin_fd);
4809         if (err) {
4810                 return err;
4811         }
4812         map->pinned = true;
4813         pr_debug("reused pinned map at '%s'\n", map->pin_path);
4814
4815         return 0;
4816 }
4817
4818 static int
4819 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4820 {
4821         enum libbpf_map_type map_type = map->libbpf_type;
4822         char *cp, errmsg[STRERR_BUFSIZE];
4823         int err, zero = 0;
4824
4825         if (obj->gen_loader) {
4826                 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
4827                                          map->mmaped, map->def.value_size);
4828                 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
4829                         bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
4830                 return 0;
4831         }
4832         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4833         if (err) {
4834                 err = -errno;
4835                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4836                 pr_warn("Error setting initial map(%s) contents: %s\n",
4837                         map->name, cp);
4838                 return err;
4839         }
4840
4841         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4842         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
4843                 err = bpf_map_freeze(map->fd);
4844                 if (err) {
4845                         err = -errno;
4846                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4847                         pr_warn("Error freezing map(%s) as read-only: %s\n",
4848                                 map->name, cp);
4849                         return err;
4850                 }
4851         }
4852         return 0;
4853 }
4854
4855 static void bpf_map__destroy(struct bpf_map *map);
4856
4857 static size_t adjust_ringbuf_sz(size_t sz)
4858 {
4859         __u32 page_sz = sysconf(_SC_PAGE_SIZE);
4860         __u32 mul;
4861
4862         /* if user forgot to set any size, make sure they see error */
4863         if (sz == 0)
4864                 return 0;
4865         /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
4866          * a power-of-2 multiple of kernel's page size. If user diligently
4867          * satisified these conditions, pass the size through.
4868          */
4869         if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
4870                 return sz;
4871
4872         /* Otherwise find closest (page_sz * power_of_2) product bigger than
4873          * user-set size to satisfy both user size request and kernel
4874          * requirements and substitute correct max_entries for map creation.
4875          */
4876         for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
4877                 if (mul * page_sz > sz)
4878                         return mul * page_sz;
4879         }
4880
4881         /* if it's impossible to satisfy the conditions (i.e., user size is
4882          * very close to UINT_MAX but is not a power-of-2 multiple of
4883          * page_size) then just return original size and let kernel reject it
4884          */
4885         return sz;
4886 }
4887
4888 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
4889 {
4890         LIBBPF_OPTS(bpf_map_create_opts, create_attr);
4891         struct bpf_map_def *def = &map->def;
4892         const char *map_name = NULL;
4893         int err = 0;
4894
4895         if (kernel_supports(obj, FEAT_PROG_NAME))
4896                 map_name = map->name;
4897         create_attr.map_ifindex = map->map_ifindex;
4898         create_attr.map_flags = def->map_flags;
4899         create_attr.numa_node = map->numa_node;
4900         create_attr.map_extra = map->map_extra;
4901
4902         if (bpf_map__is_struct_ops(map))
4903                 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
4904
4905         if (obj->btf && btf__fd(obj->btf) >= 0) {
4906                 create_attr.btf_fd = btf__fd(obj->btf);
4907                 create_attr.btf_key_type_id = map->btf_key_type_id;
4908                 create_attr.btf_value_type_id = map->btf_value_type_id;
4909         }
4910
4911         if (bpf_map_type__is_map_in_map(def->type)) {
4912                 if (map->inner_map) {
4913                         err = bpf_object__create_map(obj, map->inner_map, true);
4914                         if (err) {
4915                                 pr_warn("map '%s': failed to create inner map: %d\n",
4916                                         map->name, err);
4917                                 return err;
4918                         }
4919                         map->inner_map_fd = bpf_map__fd(map->inner_map);
4920                 }
4921                 if (map->inner_map_fd >= 0)
4922                         create_attr.inner_map_fd = map->inner_map_fd;
4923         }
4924
4925         switch (def->type) {
4926         case BPF_MAP_TYPE_RINGBUF:
4927                 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
4928                 /* fallthrough */
4929         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
4930         case BPF_MAP_TYPE_CGROUP_ARRAY:
4931         case BPF_MAP_TYPE_STACK_TRACE:
4932         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
4933         case BPF_MAP_TYPE_HASH_OF_MAPS:
4934         case BPF_MAP_TYPE_DEVMAP:
4935         case BPF_MAP_TYPE_DEVMAP_HASH:
4936         case BPF_MAP_TYPE_CPUMAP:
4937         case BPF_MAP_TYPE_XSKMAP:
4938         case BPF_MAP_TYPE_SOCKMAP:
4939         case BPF_MAP_TYPE_SOCKHASH:
4940         case BPF_MAP_TYPE_QUEUE:
4941         case BPF_MAP_TYPE_STACK:
4942                 create_attr.btf_fd = 0;
4943                 create_attr.btf_key_type_id = 0;
4944                 create_attr.btf_value_type_id = 0;
4945                 map->btf_key_type_id = 0;
4946                 map->btf_value_type_id = 0;
4947         default:
4948                 break;
4949         }
4950
4951         if (obj->gen_loader) {
4952                 bpf_gen__map_create(obj->gen_loader, def->type, map_name,
4953                                     def->key_size, def->value_size, def->max_entries,
4954                                     &create_attr, is_inner ? -1 : map - obj->maps);
4955                 /* Pretend to have valid FD to pass various fd >= 0 checks.
4956                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
4957                  */
4958                 map->fd = 0;
4959         } else {
4960                 map->fd = bpf_map_create(def->type, map_name,
4961                                          def->key_size, def->value_size,
4962                                          def->max_entries, &create_attr);
4963         }
4964         if (map->fd < 0 && (create_attr.btf_key_type_id ||
4965                             create_attr.btf_value_type_id)) {
4966                 char *cp, errmsg[STRERR_BUFSIZE];
4967
4968                 err = -errno;
4969                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4970                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
4971                         map->name, cp, err);
4972                 create_attr.btf_fd = 0;
4973                 create_attr.btf_key_type_id = 0;
4974                 create_attr.btf_value_type_id = 0;
4975                 map->btf_key_type_id = 0;
4976                 map->btf_value_type_id = 0;
4977                 map->fd = bpf_map_create(def->type, map_name,
4978                                          def->key_size, def->value_size,
4979                                          def->max_entries, &create_attr);
4980         }
4981
4982         err = map->fd < 0 ? -errno : 0;
4983
4984         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
4985                 if (obj->gen_loader)
4986                         map->inner_map->fd = -1;
4987                 bpf_map__destroy(map->inner_map);
4988                 zfree(&map->inner_map);
4989         }
4990
4991         return err;
4992 }
4993
4994 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
4995 {
4996         const struct bpf_map *targ_map;
4997         unsigned int i;
4998         int fd, err = 0;
4999
5000         for (i = 0; i < map->init_slots_sz; i++) {
5001                 if (!map->init_slots[i])
5002                         continue;
5003
5004                 targ_map = map->init_slots[i];
5005                 fd = bpf_map__fd(targ_map);
5006
5007                 if (obj->gen_loader) {
5008                         bpf_gen__populate_outer_map(obj->gen_loader,
5009                                                     map - obj->maps, i,
5010                                                     targ_map - obj->maps);
5011                 } else {
5012                         err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5013                 }
5014                 if (err) {
5015                         err = -errno;
5016                         pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
5017                                 map->name, i, targ_map->name, fd, err);
5018                         return err;
5019                 }
5020                 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5021                          map->name, i, targ_map->name, fd);
5022         }
5023
5024         zfree(&map->init_slots);
5025         map->init_slots_sz = 0;
5026
5027         return 0;
5028 }
5029
5030 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
5031 {
5032         const struct bpf_program *targ_prog;
5033         unsigned int i;
5034         int fd, err;
5035
5036         if (obj->gen_loader)
5037                 return -ENOTSUP;
5038
5039         for (i = 0; i < map->init_slots_sz; i++) {
5040                 if (!map->init_slots[i])
5041                         continue;
5042
5043                 targ_prog = map->init_slots[i];
5044                 fd = bpf_program__fd(targ_prog);
5045
5046                 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5047                 if (err) {
5048                         err = -errno;
5049                         pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
5050                                 map->name, i, targ_prog->name, fd, err);
5051                         return err;
5052                 }
5053                 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5054                          map->name, i, targ_prog->name, fd);
5055         }
5056
5057         zfree(&map->init_slots);
5058         map->init_slots_sz = 0;
5059
5060         return 0;
5061 }
5062
5063 static int bpf_object_init_prog_arrays(struct bpf_object *obj)
5064 {
5065         struct bpf_map *map;
5066         int i, err;
5067
5068         for (i = 0; i < obj->nr_maps; i++) {
5069                 map = &obj->maps[i];
5070
5071                 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
5072                         continue;
5073
5074                 err = init_prog_array_slots(obj, map);
5075                 if (err < 0) {
5076                         zclose(map->fd);
5077                         return err;
5078                 }
5079         }
5080         return 0;
5081 }
5082
5083 static int map_set_def_max_entries(struct bpf_map *map)
5084 {
5085         if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5086                 int nr_cpus;
5087
5088                 nr_cpus = libbpf_num_possible_cpus();
5089                 if (nr_cpus < 0) {
5090                         pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5091                                 map->name, nr_cpus);
5092                         return nr_cpus;
5093                 }
5094                 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5095                 map->def.max_entries = nr_cpus;
5096         }
5097
5098         return 0;
5099 }
5100
5101 static int
5102 bpf_object__create_maps(struct bpf_object *obj)
5103 {
5104         struct bpf_map *map;
5105         char *cp, errmsg[STRERR_BUFSIZE];
5106         unsigned int i, j;
5107         int err;
5108         bool retried;
5109
5110         for (i = 0; i < obj->nr_maps; i++) {
5111                 map = &obj->maps[i];
5112
5113                 /* To support old kernels, we skip creating global data maps
5114                  * (.rodata, .data, .kconfig, etc); later on, during program
5115                  * loading, if we detect that at least one of the to-be-loaded
5116                  * programs is referencing any global data map, we'll error
5117                  * out with program name and relocation index logged.
5118                  * This approach allows to accommodate Clang emitting
5119                  * unnecessary .rodata.str1.1 sections for string literals,
5120                  * but also it allows to have CO-RE applications that use
5121                  * global variables in some of BPF programs, but not others.
5122                  * If those global variable-using programs are not loaded at
5123                  * runtime due to bpf_program__set_autoload(prog, false),
5124                  * bpf_object loading will succeed just fine even on old
5125                  * kernels.
5126                  */
5127                 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5128                         map->autocreate = false;
5129
5130                 if (!map->autocreate) {
5131                         pr_debug("map '%s': skipped auto-creating...\n", map->name);
5132                         continue;
5133                 }
5134
5135                 err = map_set_def_max_entries(map);
5136                 if (err)
5137                         goto err_out;
5138
5139                 retried = false;
5140 retry:
5141                 if (map->pin_path) {
5142                         err = bpf_object__reuse_map(map);
5143                         if (err) {
5144                                 pr_warn("map '%s': error reusing pinned map\n",
5145                                         map->name);
5146                                 goto err_out;
5147                         }
5148                         if (retried && map->fd < 0) {
5149                                 pr_warn("map '%s': cannot find pinned map\n",
5150                                         map->name);
5151                                 err = -ENOENT;
5152                                 goto err_out;
5153                         }
5154                 }
5155
5156                 if (map->fd >= 0) {
5157                         pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5158                                  map->name, map->fd);
5159                 } else {
5160                         err = bpf_object__create_map(obj, map, false);
5161                         if (err)
5162                                 goto err_out;
5163
5164                         pr_debug("map '%s': created successfully, fd=%d\n",
5165                                  map->name, map->fd);
5166
5167                         if (bpf_map__is_internal(map)) {
5168                                 err = bpf_object__populate_internal_map(obj, map);
5169                                 if (err < 0) {
5170                                         zclose(map->fd);
5171                                         goto err_out;
5172                                 }
5173                         }
5174
5175                         if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5176                                 err = init_map_in_map_slots(obj, map);
5177                                 if (err < 0) {
5178                                         zclose(map->fd);
5179                                         goto err_out;
5180                                 }
5181                         }
5182                 }
5183
5184                 if (map->pin_path && !map->pinned) {
5185                         err = bpf_map__pin(map, NULL);
5186                         if (err) {
5187                                 zclose(map->fd);
5188                                 if (!retried && err == -EEXIST) {
5189                                         retried = true;
5190                                         goto retry;
5191                                 }
5192                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
5193                                         map->name, map->pin_path, err);
5194                                 goto err_out;
5195                         }
5196                 }
5197         }
5198
5199         return 0;
5200
5201 err_out:
5202         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5203         pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
5204         pr_perm_msg(err);
5205         for (j = 0; j < i; j++)
5206                 zclose(obj->maps[j].fd);
5207         return err;
5208 }
5209
5210 static bool bpf_core_is_flavor_sep(const char *s)
5211 {
5212         /* check X___Y name pattern, where X and Y are not underscores */
5213         return s[0] != '_' &&                                 /* X */
5214                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
5215                s[4] != '_';                                   /* Y */
5216 }
5217
5218 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5219  * before last triple underscore. Struct name part after last triple
5220  * underscore is ignored by BPF CO-RE relocation during relocation matching.
5221  */
5222 size_t bpf_core_essential_name_len(const char *name)
5223 {
5224         size_t n = strlen(name);
5225         int i;
5226
5227         for (i = n - 5; i >= 0; i--) {
5228                 if (bpf_core_is_flavor_sep(name + i))
5229                         return i + 1;
5230         }
5231         return n;
5232 }
5233
5234 void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5235 {
5236         if (!cands)
5237                 return;
5238
5239         free(cands->cands);
5240         free(cands);
5241 }
5242
5243 int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5244                        size_t local_essent_len,
5245                        const struct btf *targ_btf,
5246                        const char *targ_btf_name,
5247                        int targ_start_id,
5248                        struct bpf_core_cand_list *cands)
5249 {
5250         struct bpf_core_cand *new_cands, *cand;
5251         const struct btf_type *t, *local_t;
5252         const char *targ_name, *local_name;
5253         size_t targ_essent_len;
5254         int n, i;
5255
5256         local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5257         local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5258
5259         n = btf__type_cnt(targ_btf);
5260         for (i = targ_start_id; i < n; i++) {
5261                 t = btf__type_by_id(targ_btf, i);
5262                 if (!btf_kind_core_compat(t, local_t))
5263                         continue;
5264
5265                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
5266                 if (str_is_empty(targ_name))
5267                         continue;
5268
5269                 targ_essent_len = bpf_core_essential_name_len(targ_name);
5270                 if (targ_essent_len != local_essent_len)
5271                         continue;
5272
5273                 if (strncmp(local_name, targ_name, local_essent_len) != 0)
5274                         continue;
5275
5276                 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5277                          local_cand->id, btf_kind_str(local_t),
5278                          local_name, i, btf_kind_str(t), targ_name,
5279                          targ_btf_name);
5280                 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5281                                               sizeof(*cands->cands));
5282                 if (!new_cands)
5283                         return -ENOMEM;
5284
5285                 cand = &new_cands[cands->len];
5286                 cand->btf = targ_btf;
5287                 cand->id = i;
5288
5289                 cands->cands = new_cands;
5290                 cands->len++;
5291         }
5292         return 0;
5293 }
5294
5295 static int load_module_btfs(struct bpf_object *obj)
5296 {
5297         struct bpf_btf_info info;
5298         struct module_btf *mod_btf;
5299         struct btf *btf;
5300         char name[64];
5301         __u32 id = 0, len;
5302         int err, fd;
5303
5304         if (obj->btf_modules_loaded)
5305                 return 0;
5306
5307         if (obj->gen_loader)
5308                 return 0;
5309
5310         /* don't do this again, even if we find no module BTFs */
5311         obj->btf_modules_loaded = true;
5312
5313         /* kernel too old to support module BTFs */
5314         if (!kernel_supports(obj, FEAT_MODULE_BTF))
5315                 return 0;
5316
5317         while (true) {
5318                 err = bpf_btf_get_next_id(id, &id);
5319                 if (err && errno == ENOENT)
5320                         return 0;
5321                 if (err) {
5322                         err = -errno;
5323                         pr_warn("failed to iterate BTF objects: %d\n", err);
5324                         return err;
5325                 }
5326
5327                 fd = bpf_btf_get_fd_by_id(id);
5328                 if (fd < 0) {
5329                         if (errno == ENOENT)
5330                                 continue; /* expected race: BTF was unloaded */
5331                         err = -errno;
5332                         pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
5333                         return err;
5334                 }
5335
5336                 len = sizeof(info);
5337                 memset(&info, 0, sizeof(info));
5338                 info.name = ptr_to_u64(name);
5339                 info.name_len = sizeof(name);
5340
5341                 err = bpf_obj_get_info_by_fd(fd, &info, &len);
5342                 if (err) {
5343                         err = -errno;
5344                         pr_warn("failed to get BTF object #%d info: %d\n", id, err);
5345                         goto err_out;
5346                 }
5347
5348                 /* ignore non-module BTFs */
5349                 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5350                         close(fd);
5351                         continue;
5352                 }
5353
5354                 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5355                 err = libbpf_get_error(btf);
5356                 if (err) {
5357                         pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5358                                 name, id, err);
5359                         goto err_out;
5360                 }
5361
5362                 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5363                                         sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5364                 if (err)
5365                         goto err_out;
5366
5367                 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5368
5369                 mod_btf->btf = btf;
5370                 mod_btf->id = id;
5371                 mod_btf->fd = fd;
5372                 mod_btf->name = strdup(name);
5373                 if (!mod_btf->name) {
5374                         err = -ENOMEM;
5375                         goto err_out;
5376                 }
5377                 continue;
5378
5379 err_out:
5380                 close(fd);
5381                 return err;
5382         }
5383
5384         return 0;
5385 }
5386
5387 static struct bpf_core_cand_list *
5388 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5389 {
5390         struct bpf_core_cand local_cand = {};
5391         struct bpf_core_cand_list *cands;
5392         const struct btf *main_btf;
5393         const struct btf_type *local_t;
5394         const char *local_name;
5395         size_t local_essent_len;
5396         int err, i;
5397
5398         local_cand.btf = local_btf;
5399         local_cand.id = local_type_id;
5400         local_t = btf__type_by_id(local_btf, local_type_id);
5401         if (!local_t)
5402                 return ERR_PTR(-EINVAL);
5403
5404         local_name = btf__name_by_offset(local_btf, local_t->name_off);
5405         if (str_is_empty(local_name))
5406                 return ERR_PTR(-EINVAL);
5407         local_essent_len = bpf_core_essential_name_len(local_name);
5408
5409         cands = calloc(1, sizeof(*cands));
5410         if (!cands)
5411                 return ERR_PTR(-ENOMEM);
5412
5413         /* Attempt to find target candidates in vmlinux BTF first */
5414         main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5415         err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5416         if (err)
5417                 goto err_out;
5418
5419         /* if vmlinux BTF has any candidate, don't got for module BTFs */
5420         if (cands->len)
5421                 return cands;
5422
5423         /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5424         if (obj->btf_vmlinux_override)
5425                 return cands;
5426
5427         /* now look through module BTFs, trying to still find candidates */
5428         err = load_module_btfs(obj);
5429         if (err)
5430                 goto err_out;
5431
5432         for (i = 0; i < obj->btf_module_cnt; i++) {
5433                 err = bpf_core_add_cands(&local_cand, local_essent_len,
5434                                          obj->btf_modules[i].btf,
5435                                          obj->btf_modules[i].name,
5436                                          btf__type_cnt(obj->btf_vmlinux),
5437                                          cands);
5438                 if (err)
5439                         goto err_out;
5440         }
5441
5442         return cands;
5443 err_out:
5444         bpf_core_free_cands(cands);
5445         return ERR_PTR(err);
5446 }
5447
5448 /* Check local and target types for compatibility. This check is used for
5449  * type-based CO-RE relocations and follow slightly different rules than
5450  * field-based relocations. This function assumes that root types were already
5451  * checked for name match. Beyond that initial root-level name check, names
5452  * are completely ignored. Compatibility rules are as follows:
5453  *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5454  *     kind should match for local and target types (i.e., STRUCT is not
5455  *     compatible with UNION);
5456  *   - for ENUMs, the size is ignored;
5457  *   - for INT, size and signedness are ignored;
5458  *   - for ARRAY, dimensionality is ignored, element types are checked for
5459  *     compatibility recursively;
5460  *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
5461  *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5462  *   - FUNC_PROTOs are compatible if they have compatible signature: same
5463  *     number of input args and compatible return and argument types.
5464  * These rules are not set in stone and probably will be adjusted as we get
5465  * more experience with using BPF CO-RE relocations.
5466  */
5467 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5468                               const struct btf *targ_btf, __u32 targ_id)
5469 {
5470         return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5471 }
5472
5473 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5474                          const struct btf *targ_btf, __u32 targ_id)
5475 {
5476         return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5477 }
5478
5479 static size_t bpf_core_hash_fn(const void *key, void *ctx)
5480 {
5481         return (size_t)key;
5482 }
5483
5484 static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
5485 {
5486         return k1 == k2;
5487 }
5488
5489 static void *u32_as_hash_key(__u32 x)
5490 {
5491         return (void *)(uintptr_t)x;
5492 }
5493
5494 static int record_relo_core(struct bpf_program *prog,
5495                             const struct bpf_core_relo *core_relo, int insn_idx)
5496 {
5497         struct reloc_desc *relos, *relo;
5498
5499         relos = libbpf_reallocarray(prog->reloc_desc,
5500                                     prog->nr_reloc + 1, sizeof(*relos));
5501         if (!relos)
5502                 return -ENOMEM;
5503         relo = &relos[prog->nr_reloc];
5504         relo->type = RELO_CORE;
5505         relo->insn_idx = insn_idx;
5506         relo->core_relo = core_relo;
5507         prog->reloc_desc = relos;
5508         prog->nr_reloc++;
5509         return 0;
5510 }
5511
5512 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5513 {
5514         struct reloc_desc *relo;
5515         int i;
5516
5517         for (i = 0; i < prog->nr_reloc; i++) {
5518                 relo = &prog->reloc_desc[i];
5519                 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5520                         continue;
5521
5522                 return relo->core_relo;
5523         }
5524
5525         return NULL;
5526 }
5527
5528 static int bpf_core_resolve_relo(struct bpf_program *prog,
5529                                  const struct bpf_core_relo *relo,
5530                                  int relo_idx,
5531                                  const struct btf *local_btf,
5532                                  struct hashmap *cand_cache,
5533                                  struct bpf_core_relo_res *targ_res)
5534 {
5535         struct bpf_core_spec specs_scratch[3] = {};
5536         const void *type_key = u32_as_hash_key(relo->type_id);
5537         struct bpf_core_cand_list *cands = NULL;
5538         const char *prog_name = prog->name;
5539         const struct btf_type *local_type;
5540         const char *local_name;
5541         __u32 local_id = relo->type_id;
5542         int err;
5543
5544         local_type = btf__type_by_id(local_btf, local_id);
5545         if (!local_type)
5546                 return -EINVAL;
5547
5548         local_name = btf__name_by_offset(local_btf, local_type->name_off);
5549         if (!local_name)
5550                 return -EINVAL;
5551
5552         if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
5553             !hashmap__find(cand_cache, type_key, (void **)&cands)) {
5554                 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
5555                 if (IS_ERR(cands)) {
5556                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5557                                 prog_name, relo_idx, local_id, btf_kind_str(local_type),
5558                                 local_name, PTR_ERR(cands));
5559                         return PTR_ERR(cands);
5560                 }
5561                 err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
5562                 if (err) {
5563                         bpf_core_free_cands(cands);
5564                         return err;
5565                 }
5566         }
5567
5568         return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
5569                                        targ_res);
5570 }
5571
5572 static int
5573 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5574 {
5575         const struct btf_ext_info_sec *sec;
5576         struct bpf_core_relo_res targ_res;
5577         const struct bpf_core_relo *rec;
5578         const struct btf_ext_info *seg;
5579         struct hashmap_entry *entry;
5580         struct hashmap *cand_cache = NULL;
5581         struct bpf_program *prog;
5582         struct bpf_insn *insn;
5583         const char *sec_name;
5584         int i, err = 0, insn_idx, sec_idx, sec_num;
5585
5586         if (obj->btf_ext->core_relo_info.len == 0)
5587                 return 0;
5588
5589         if (targ_btf_path) {
5590                 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
5591                 err = libbpf_get_error(obj->btf_vmlinux_override);
5592                 if (err) {
5593                         pr_warn("failed to parse target BTF: %d\n", err);
5594                         return err;
5595                 }
5596         }
5597
5598         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5599         if (IS_ERR(cand_cache)) {
5600                 err = PTR_ERR(cand_cache);
5601                 goto out;
5602         }
5603
5604         seg = &obj->btf_ext->core_relo_info;
5605         sec_num = 0;
5606         for_each_btf_ext_sec(seg, sec) {
5607                 sec_idx = seg->sec_idxs[sec_num];
5608                 sec_num++;
5609
5610                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5611                 if (str_is_empty(sec_name)) {
5612                         err = -EINVAL;
5613                         goto out;
5614                 }
5615
5616                 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
5617
5618                 for_each_btf_ext_rec(seg, sec, i, rec) {
5619                         if (rec->insn_off % BPF_INSN_SZ)
5620                                 return -EINVAL;
5621                         insn_idx = rec->insn_off / BPF_INSN_SZ;
5622                         prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5623                         if (!prog) {
5624                                 /* When __weak subprog is "overridden" by another instance
5625                                  * of the subprog from a different object file, linker still
5626                                  * appends all the .BTF.ext info that used to belong to that
5627                                  * eliminated subprogram.
5628                                  * This is similar to what x86-64 linker does for relocations.
5629                                  * So just ignore such relocations just like we ignore
5630                                  * subprog instructions when discovering subprograms.
5631                                  */
5632                                 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5633                                          sec_name, i, insn_idx);
5634                                 continue;
5635                         }
5636                         /* no need to apply CO-RE relocation if the program is
5637                          * not going to be loaded
5638                          */
5639                         if (!prog->autoload)
5640                                 continue;
5641
5642                         /* adjust insn_idx from section frame of reference to the local
5643                          * program's frame of reference; (sub-)program code is not yet
5644                          * relocated, so it's enough to just subtract in-section offset
5645                          */
5646                         insn_idx = insn_idx - prog->sec_insn_off;
5647                         if (insn_idx >= prog->insns_cnt)
5648                                 return -EINVAL;
5649                         insn = &prog->insns[insn_idx];
5650
5651                         err = record_relo_core(prog, rec, insn_idx);
5652                         if (err) {
5653                                 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
5654                                         prog->name, i, err);
5655                                 goto out;
5656                         }
5657
5658                         if (prog->obj->gen_loader)
5659                                 continue;
5660
5661                         err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
5662                         if (err) {
5663                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5664                                         prog->name, i, err);
5665                                 goto out;
5666                         }
5667
5668                         err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
5669                         if (err) {
5670                                 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
5671                                         prog->name, i, insn_idx, err);
5672                                 goto out;
5673                         }
5674                 }
5675         }
5676
5677 out:
5678         /* obj->btf_vmlinux and module BTFs are freed after object load */
5679         btf__free(obj->btf_vmlinux_override);
5680         obj->btf_vmlinux_override = NULL;
5681
5682         if (!IS_ERR_OR_NULL(cand_cache)) {
5683                 hashmap__for_each_entry(cand_cache, entry, i) {
5684                         bpf_core_free_cands(entry->value);
5685                 }
5686                 hashmap__free(cand_cache);
5687         }
5688         return err;
5689 }
5690
5691 /* base map load ldimm64 special constant, used also for log fixup logic */
5692 #define MAP_LDIMM64_POISON_BASE 2001000000
5693 #define MAP_LDIMM64_POISON_PFX "200100"
5694
5695 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
5696                                int insn_idx, struct bpf_insn *insn,
5697                                int map_idx, const struct bpf_map *map)
5698 {
5699         int i;
5700
5701         pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
5702                  prog->name, relo_idx, insn_idx, map_idx, map->name);
5703
5704         /* we turn single ldimm64 into two identical invalid calls */
5705         for (i = 0; i < 2; i++) {
5706                 insn->code = BPF_JMP | BPF_CALL;
5707                 insn->dst_reg = 0;
5708                 insn->src_reg = 0;
5709                 insn->off = 0;
5710                 /* if this instruction is reachable (not a dead code),
5711                  * verifier will complain with something like:
5712                  * invalid func unknown#2001000123
5713                  * where lower 123 is map index into obj->maps[] array
5714                  */
5715                 insn->imm = MAP_LDIMM64_POISON_BASE + map_idx;
5716
5717                 insn++;
5718         }
5719 }
5720
5721 /* Relocate data references within program code:
5722  *  - map references;
5723  *  - global variable references;
5724  *  - extern references.
5725  */
5726 static int
5727 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
5728 {
5729         int i;
5730
5731         for (i = 0; i < prog->nr_reloc; i++) {
5732                 struct reloc_desc *relo = &prog->reloc_desc[i];
5733                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
5734                 const struct bpf_map *map;
5735                 struct extern_desc *ext;
5736
5737                 switch (relo->type) {
5738                 case RELO_LD64:
5739                         map = &obj->maps[relo->map_idx];
5740                         if (obj->gen_loader) {
5741                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
5742                                 insn[0].imm = relo->map_idx;
5743                         } else if (map->autocreate) {
5744                                 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
5745                                 insn[0].imm = map->fd;
5746                         } else {
5747                                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5748                                                    relo->map_idx, map);
5749                         }
5750                         break;
5751                 case RELO_DATA:
5752                         map = &obj->maps[relo->map_idx];
5753                         insn[1].imm = insn[0].imm + relo->sym_off;
5754                         if (obj->gen_loader) {
5755                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5756                                 insn[0].imm = relo->map_idx;
5757                         } else if (map->autocreate) {
5758                                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5759                                 insn[0].imm = map->fd;
5760                         } else {
5761                                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5762                                                    relo->map_idx, map);
5763                         }
5764                         break;
5765                 case RELO_EXTERN_VAR:
5766                         ext = &obj->externs[relo->sym_off];
5767                         if (ext->type == EXT_KCFG) {
5768                                 if (obj->gen_loader) {
5769                                         insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5770                                         insn[0].imm = obj->kconfig_map_idx;
5771                                 } else {
5772                                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5773                                         insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
5774                                 }
5775                                 insn[1].imm = ext->kcfg.data_off;
5776                         } else /* EXT_KSYM */ {
5777                                 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
5778                                         insn[0].src_reg = BPF_PSEUDO_BTF_ID;
5779                                         insn[0].imm = ext->ksym.kernel_btf_id;
5780                                         insn[1].imm = ext->ksym.kernel_btf_obj_fd;
5781                                 } else { /* typeless ksyms or unresolved typed ksyms */
5782                                         insn[0].imm = (__u32)ext->ksym.addr;
5783                                         insn[1].imm = ext->ksym.addr >> 32;
5784                                 }
5785                         }
5786                         break;
5787                 case RELO_EXTERN_FUNC:
5788                         ext = &obj->externs[relo->sym_off];
5789                         insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
5790                         if (ext->is_set) {
5791                                 insn[0].imm = ext->ksym.kernel_btf_id;
5792                                 insn[0].off = ext->ksym.btf_fd_idx;
5793                         } else { /* unresolved weak kfunc */
5794                                 insn[0].imm = 0;
5795                                 insn[0].off = 0;
5796                         }
5797                         break;
5798                 case RELO_SUBPROG_ADDR:
5799                         if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
5800                                 pr_warn("prog '%s': relo #%d: bad insn\n",
5801                                         prog->name, i);
5802                                 return -EINVAL;
5803                         }
5804                         /* handled already */
5805                         break;
5806                 case RELO_CALL:
5807                         /* handled already */
5808                         break;
5809                 case RELO_CORE:
5810                         /* will be handled by bpf_program_record_relos() */
5811                         break;
5812                 default:
5813                         pr_warn("prog '%s': relo #%d: bad relo type %d\n",
5814                                 prog->name, i, relo->type);
5815                         return -EINVAL;
5816                 }
5817         }
5818
5819         return 0;
5820 }
5821
5822 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
5823                                     const struct bpf_program *prog,
5824                                     const struct btf_ext_info *ext_info,
5825                                     void **prog_info, __u32 *prog_rec_cnt,
5826                                     __u32 *prog_rec_sz)
5827 {
5828         void *copy_start = NULL, *copy_end = NULL;
5829         void *rec, *rec_end, *new_prog_info;
5830         const struct btf_ext_info_sec *sec;
5831         size_t old_sz, new_sz;
5832         int i, sec_num, sec_idx, off_adj;
5833
5834         sec_num = 0;
5835         for_each_btf_ext_sec(ext_info, sec) {
5836                 sec_idx = ext_info->sec_idxs[sec_num];
5837                 sec_num++;
5838                 if (prog->sec_idx != sec_idx)
5839                         continue;
5840
5841                 for_each_btf_ext_rec(ext_info, sec, i, rec) {
5842                         __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
5843
5844                         if (insn_off < prog->sec_insn_off)
5845                                 continue;
5846                         if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
5847                                 break;
5848
5849                         if (!copy_start)
5850                                 copy_start = rec;
5851                         copy_end = rec + ext_info->rec_size;
5852                 }
5853
5854                 if (!copy_start)
5855                         return -ENOENT;
5856
5857                 /* append func/line info of a given (sub-)program to the main
5858                  * program func/line info
5859                  */
5860                 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
5861                 new_sz = old_sz + (copy_end - copy_start);
5862                 new_prog_info = realloc(*prog_info, new_sz);
5863                 if (!new_prog_info)
5864                         return -ENOMEM;
5865                 *prog_info = new_prog_info;
5866                 *prog_rec_cnt = new_sz / ext_info->rec_size;
5867                 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
5868
5869                 /* Kernel instruction offsets are in units of 8-byte
5870                  * instructions, while .BTF.ext instruction offsets generated
5871                  * by Clang are in units of bytes. So convert Clang offsets
5872                  * into kernel offsets and adjust offset according to program
5873                  * relocated position.
5874                  */
5875                 off_adj = prog->sub_insn_off - prog->sec_insn_off;
5876                 rec = new_prog_info + old_sz;
5877                 rec_end = new_prog_info + new_sz;
5878                 for (; rec < rec_end; rec += ext_info->rec_size) {
5879                         __u32 *insn_off = rec;
5880
5881                         *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
5882                 }
5883                 *prog_rec_sz = ext_info->rec_size;
5884                 return 0;
5885         }
5886
5887         return -ENOENT;
5888 }
5889
5890 static int
5891 reloc_prog_func_and_line_info(const struct bpf_object *obj,
5892                               struct bpf_program *main_prog,
5893                               const struct bpf_program *prog)
5894 {
5895         int err;
5896
5897         /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
5898          * supprot func/line info
5899          */
5900         if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
5901                 return 0;
5902
5903         /* only attempt func info relocation if main program's func_info
5904          * relocation was successful
5905          */
5906         if (main_prog != prog && !main_prog->func_info)
5907                 goto line_info;
5908
5909         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
5910                                        &main_prog->func_info,
5911                                        &main_prog->func_info_cnt,
5912                                        &main_prog->func_info_rec_size);
5913         if (err) {
5914                 if (err != -ENOENT) {
5915                         pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
5916                                 prog->name, err);
5917                         return err;
5918                 }
5919                 if (main_prog->func_info) {
5920                         /*
5921                          * Some info has already been found but has problem
5922                          * in the last btf_ext reloc. Must have to error out.
5923                          */
5924                         pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
5925                         return err;
5926                 }
5927                 /* Have problem loading the very first info. Ignore the rest. */
5928                 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
5929                         prog->name);
5930         }
5931
5932 line_info:
5933         /* don't relocate line info if main program's relocation failed */
5934         if (main_prog != prog && !main_prog->line_info)
5935                 return 0;
5936
5937         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
5938                                        &main_prog->line_info,
5939                                        &main_prog->line_info_cnt,
5940                                        &main_prog->line_info_rec_size);
5941         if (err) {
5942                 if (err != -ENOENT) {
5943                         pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
5944                                 prog->name, err);
5945                         return err;
5946                 }
5947                 if (main_prog->line_info) {
5948                         /*
5949                          * Some info has already been found but has problem
5950                          * in the last btf_ext reloc. Must have to error out.
5951                          */
5952                         pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
5953                         return err;
5954                 }
5955                 /* Have problem loading the very first info. Ignore the rest. */
5956                 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
5957                         prog->name);
5958         }
5959         return 0;
5960 }
5961
5962 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
5963 {
5964         size_t insn_idx = *(const size_t *)key;
5965         const struct reloc_desc *relo = elem;
5966
5967         if (insn_idx == relo->insn_idx)
5968                 return 0;
5969         return insn_idx < relo->insn_idx ? -1 : 1;
5970 }
5971
5972 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
5973 {
5974         if (!prog->nr_reloc)
5975                 return NULL;
5976         return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
5977                        sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
5978 }
5979
5980 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
5981 {
5982         int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
5983         struct reloc_desc *relos;
5984         int i;
5985
5986         if (main_prog == subprog)
5987                 return 0;
5988         relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
5989         if (!relos)
5990                 return -ENOMEM;
5991         if (subprog->nr_reloc)
5992                 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
5993                        sizeof(*relos) * subprog->nr_reloc);
5994
5995         for (i = main_prog->nr_reloc; i < new_cnt; i++)
5996                 relos[i].insn_idx += subprog->sub_insn_off;
5997         /* After insn_idx adjustment the 'relos' array is still sorted
5998          * by insn_idx and doesn't break bsearch.
5999          */
6000         main_prog->reloc_desc = relos;
6001         main_prog->nr_reloc = new_cnt;
6002         return 0;
6003 }
6004
6005 static int
6006 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6007                        struct bpf_program *prog)
6008 {
6009         size_t sub_insn_idx, insn_idx, new_cnt;
6010         struct bpf_program *subprog;
6011         struct bpf_insn *insns, *insn;
6012         struct reloc_desc *relo;
6013         int err;
6014
6015         err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6016         if (err)
6017                 return err;
6018
6019         for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6020                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6021                 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6022                         continue;
6023
6024                 relo = find_prog_insn_relo(prog, insn_idx);
6025                 if (relo && relo->type == RELO_EXTERN_FUNC)
6026                         /* kfunc relocations will be handled later
6027                          * in bpf_object__relocate_data()
6028                          */
6029                         continue;
6030                 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6031                         pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6032                                 prog->name, insn_idx, relo->type);
6033                         return -LIBBPF_ERRNO__RELOC;
6034                 }
6035                 if (relo) {
6036                         /* sub-program instruction index is a combination of
6037                          * an offset of a symbol pointed to by relocation and
6038                          * call instruction's imm field; for global functions,
6039                          * call always has imm = -1, but for static functions
6040                          * relocation is against STT_SECTION and insn->imm
6041                          * points to a start of a static function
6042                          *
6043                          * for subprog addr relocation, the relo->sym_off + insn->imm is
6044                          * the byte offset in the corresponding section.
6045                          */
6046                         if (relo->type == RELO_CALL)
6047                                 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6048                         else
6049                                 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6050                 } else if (insn_is_pseudo_func(insn)) {
6051                         /*
6052                          * RELO_SUBPROG_ADDR relo is always emitted even if both
6053                          * functions are in the same section, so it shouldn't reach here.
6054                          */
6055                         pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6056                                 prog->name, insn_idx);
6057                         return -LIBBPF_ERRNO__RELOC;
6058                 } else {
6059                         /* if subprogram call is to a static function within
6060                          * the same ELF section, there won't be any relocation
6061                          * emitted, but it also means there is no additional
6062                          * offset necessary, insns->imm is relative to
6063                          * instruction's original position within the section
6064                          */
6065                         sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6066                 }
6067
6068                 /* we enforce that sub-programs should be in .text section */
6069                 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6070                 if (!subprog) {
6071                         pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6072                                 prog->name);
6073                         return -LIBBPF_ERRNO__RELOC;
6074                 }
6075
6076                 /* if it's the first call instruction calling into this
6077                  * subprogram (meaning this subprog hasn't been processed
6078                  * yet) within the context of current main program:
6079                  *   - append it at the end of main program's instructions blog;
6080                  *   - process is recursively, while current program is put on hold;
6081                  *   - if that subprogram calls some other not yet processes
6082                  *   subprogram, same thing will happen recursively until
6083                  *   there are no more unprocesses subprograms left to append
6084                  *   and relocate.
6085                  */
6086                 if (subprog->sub_insn_off == 0) {
6087                         subprog->sub_insn_off = main_prog->insns_cnt;
6088
6089                         new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6090                         insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6091                         if (!insns) {
6092                                 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6093                                 return -ENOMEM;
6094                         }
6095                         main_prog->insns = insns;
6096                         main_prog->insns_cnt = new_cnt;
6097
6098                         memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6099                                subprog->insns_cnt * sizeof(*insns));
6100
6101                         pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6102                                  main_prog->name, subprog->insns_cnt, subprog->name);
6103
6104                         /* The subprog insns are now appended. Append its relos too. */
6105                         err = append_subprog_relos(main_prog, subprog);
6106                         if (err)
6107                                 return err;
6108                         err = bpf_object__reloc_code(obj, main_prog, subprog);
6109                         if (err)
6110                                 return err;
6111                 }
6112
6113                 /* main_prog->insns memory could have been re-allocated, so
6114                  * calculate pointer again
6115                  */
6116                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6117                 /* calculate correct instruction position within current main
6118                  * prog; each main prog can have a different set of
6119                  * subprograms appended (potentially in different order as
6120                  * well), so position of any subprog can be different for
6121                  * different main programs */
6122                 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6123
6124                 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6125                          prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6126         }
6127
6128         return 0;
6129 }
6130
6131 /*
6132  * Relocate sub-program calls.
6133  *
6134  * Algorithm operates as follows. Each entry-point BPF program (referred to as
6135  * main prog) is processed separately. For each subprog (non-entry functions,
6136  * that can be called from either entry progs or other subprogs) gets their
6137  * sub_insn_off reset to zero. This serves as indicator that this subprogram
6138  * hasn't been yet appended and relocated within current main prog. Once its
6139  * relocated, sub_insn_off will point at the position within current main prog
6140  * where given subprog was appended. This will further be used to relocate all
6141  * the call instructions jumping into this subprog.
6142  *
6143  * We start with main program and process all call instructions. If the call
6144  * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6145  * is zero), subprog instructions are appended at the end of main program's
6146  * instruction array. Then main program is "put on hold" while we recursively
6147  * process newly appended subprogram. If that subprogram calls into another
6148  * subprogram that hasn't been appended, new subprogram is appended again to
6149  * the *main* prog's instructions (subprog's instructions are always left
6150  * untouched, as they need to be in unmodified state for subsequent main progs
6151  * and subprog instructions are always sent only as part of a main prog) and
6152  * the process continues recursively. Once all the subprogs called from a main
6153  * prog or any of its subprogs are appended (and relocated), all their
6154  * positions within finalized instructions array are known, so it's easy to
6155  * rewrite call instructions with correct relative offsets, corresponding to
6156  * desired target subprog.
6157  *
6158  * Its important to realize that some subprogs might not be called from some
6159  * main prog and any of its called/used subprogs. Those will keep their
6160  * subprog->sub_insn_off as zero at all times and won't be appended to current
6161  * main prog and won't be relocated within the context of current main prog.
6162  * They might still be used from other main progs later.
6163  *
6164  * Visually this process can be shown as below. Suppose we have two main
6165  * programs mainA and mainB and BPF object contains three subprogs: subA,
6166  * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6167  * subC both call subB:
6168  *
6169  *        +--------+ +-------+
6170  *        |        v v       |
6171  *     +--+---+ +--+-+-+ +---+--+
6172  *     | subA | | subB | | subC |
6173  *     +--+---+ +------+ +---+--+
6174  *        ^                  ^
6175  *        |                  |
6176  *    +---+-------+   +------+----+
6177  *    |   mainA   |   |   mainB   |
6178  *    +-----------+   +-----------+
6179  *
6180  * We'll start relocating mainA, will find subA, append it and start
6181  * processing sub A recursively:
6182  *
6183  *    +-----------+------+
6184  *    |   mainA   | subA |
6185  *    +-----------+------+
6186  *
6187  * At this point we notice that subB is used from subA, so we append it and
6188  * relocate (there are no further subcalls from subB):
6189  *
6190  *    +-----------+------+------+
6191  *    |   mainA   | subA | subB |
6192  *    +-----------+------+------+
6193  *
6194  * At this point, we relocate subA calls, then go one level up and finish with
6195  * relocatin mainA calls. mainA is done.
6196  *
6197  * For mainB process is similar but results in different order. We start with
6198  * mainB and skip subA and subB, as mainB never calls them (at least
6199  * directly), but we see subC is needed, so we append and start processing it:
6200  *
6201  *    +-----------+------+
6202  *    |   mainB   | subC |
6203  *    +-----------+------+
6204  * Now we see subC needs subB, so we go back to it, append and relocate it:
6205  *
6206  *    +-----------+------+------+
6207  *    |   mainB   | subC | subB |
6208  *    +-----------+------+------+
6209  *
6210  * At this point we unwind recursion, relocate calls in subC, then in mainB.
6211  */
6212 static int
6213 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6214 {
6215         struct bpf_program *subprog;
6216         int i, err;
6217
6218         /* mark all subprogs as not relocated (yet) within the context of
6219          * current main program
6220          */
6221         for (i = 0; i < obj->nr_programs; i++) {
6222                 subprog = &obj->programs[i];
6223                 if (!prog_is_subprog(obj, subprog))
6224                         continue;
6225
6226                 subprog->sub_insn_off = 0;
6227         }
6228
6229         err = bpf_object__reloc_code(obj, prog, prog);
6230         if (err)
6231                 return err;
6232
6233         return 0;
6234 }
6235
6236 static void
6237 bpf_object__free_relocs(struct bpf_object *obj)
6238 {
6239         struct bpf_program *prog;
6240         int i;
6241
6242         /* free up relocation descriptors */
6243         for (i = 0; i < obj->nr_programs; i++) {
6244                 prog = &obj->programs[i];
6245                 zfree(&prog->reloc_desc);
6246                 prog->nr_reloc = 0;
6247         }
6248 }
6249
6250 static int cmp_relocs(const void *_a, const void *_b)
6251 {
6252         const struct reloc_desc *a = _a;
6253         const struct reloc_desc *b = _b;
6254
6255         if (a->insn_idx != b->insn_idx)
6256                 return a->insn_idx < b->insn_idx ? -1 : 1;
6257
6258         /* no two relocations should have the same insn_idx, but ... */
6259         if (a->type != b->type)
6260                 return a->type < b->type ? -1 : 1;
6261
6262         return 0;
6263 }
6264
6265 static void bpf_object__sort_relos(struct bpf_object *obj)
6266 {
6267         int i;
6268
6269         for (i = 0; i < obj->nr_programs; i++) {
6270                 struct bpf_program *p = &obj->programs[i];
6271
6272                 if (!p->nr_reloc)
6273                         continue;
6274
6275                 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6276         }
6277 }
6278
6279 static int
6280 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
6281 {
6282         struct bpf_program *prog;
6283         size_t i, j;
6284         int err;
6285
6286         if (obj->btf_ext) {
6287                 err = bpf_object__relocate_core(obj, targ_btf_path);
6288                 if (err) {
6289                         pr_warn("failed to perform CO-RE relocations: %d\n",
6290                                 err);
6291                         return err;
6292                 }
6293                 bpf_object__sort_relos(obj);
6294         }
6295
6296         /* Before relocating calls pre-process relocations and mark
6297          * few ld_imm64 instructions that points to subprogs.
6298          * Otherwise bpf_object__reloc_code() later would have to consider
6299          * all ld_imm64 insns as relocation candidates. That would
6300          * reduce relocation speed, since amount of find_prog_insn_relo()
6301          * would increase and most of them will fail to find a relo.
6302          */
6303         for (i = 0; i < obj->nr_programs; i++) {
6304                 prog = &obj->programs[i];
6305                 for (j = 0; j < prog->nr_reloc; j++) {
6306                         struct reloc_desc *relo = &prog->reloc_desc[j];
6307                         struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6308
6309                         /* mark the insn, so it's recognized by insn_is_pseudo_func() */
6310                         if (relo->type == RELO_SUBPROG_ADDR)
6311                                 insn[0].src_reg = BPF_PSEUDO_FUNC;
6312                 }
6313         }
6314
6315         /* relocate subprogram calls and append used subprograms to main
6316          * programs; each copy of subprogram code needs to be relocated
6317          * differently for each main program, because its code location might
6318          * have changed.
6319          * Append subprog relos to main programs to allow data relos to be
6320          * processed after text is completely relocated.
6321          */
6322         for (i = 0; i < obj->nr_programs; i++) {
6323                 prog = &obj->programs[i];
6324                 /* sub-program's sub-calls are relocated within the context of
6325                  * its main program only
6326                  */
6327                 if (prog_is_subprog(obj, prog))
6328                         continue;
6329                 if (!prog->autoload)
6330                         continue;
6331
6332                 err = bpf_object__relocate_calls(obj, prog);
6333                 if (err) {
6334                         pr_warn("prog '%s': failed to relocate calls: %d\n",
6335                                 prog->name, err);
6336                         return err;
6337                 }
6338         }
6339         /* Process data relos for main programs */
6340         for (i = 0; i < obj->nr_programs; i++) {
6341                 prog = &obj->programs[i];
6342                 if (prog_is_subprog(obj, prog))
6343                         continue;
6344                 if (!prog->autoload)
6345                         continue;
6346                 err = bpf_object__relocate_data(obj, prog);
6347                 if (err) {
6348                         pr_warn("prog '%s': failed to relocate data references: %d\n",
6349                                 prog->name, err);
6350                         return err;
6351                 }
6352         }
6353
6354         return 0;
6355 }
6356
6357 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
6358                                             Elf64_Shdr *shdr, Elf_Data *data);
6359
6360 static int bpf_object__collect_map_relos(struct bpf_object *obj,
6361                                          Elf64_Shdr *shdr, Elf_Data *data)
6362 {
6363         const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
6364         int i, j, nrels, new_sz;
6365         const struct btf_var_secinfo *vi = NULL;
6366         const struct btf_type *sec, *var, *def;
6367         struct bpf_map *map = NULL, *targ_map = NULL;
6368         struct bpf_program *targ_prog = NULL;
6369         bool is_prog_array, is_map_in_map;
6370         const struct btf_member *member;
6371         const char *name, *mname, *type;
6372         unsigned int moff;
6373         Elf64_Sym *sym;
6374         Elf64_Rel *rel;
6375         void *tmp;
6376
6377         if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
6378                 return -EINVAL;
6379         sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
6380         if (!sec)
6381                 return -EINVAL;
6382
6383         nrels = shdr->sh_size / shdr->sh_entsize;
6384         for (i = 0; i < nrels; i++) {
6385                 rel = elf_rel_by_idx(data, i);
6386                 if (!rel) {
6387                         pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
6388                         return -LIBBPF_ERRNO__FORMAT;
6389                 }
6390
6391                 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
6392                 if (!sym) {
6393                         pr_warn(".maps relo #%d: symbol %zx not found\n",
6394                                 i, (size_t)ELF64_R_SYM(rel->r_info));
6395                         return -LIBBPF_ERRNO__FORMAT;
6396                 }
6397                 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
6398
6399                 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
6400                          i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
6401                          (size_t)rel->r_offset, sym->st_name, name);
6402
6403                 for (j = 0; j < obj->nr_maps; j++) {
6404                         map = &obj->maps[j];
6405                         if (map->sec_idx != obj->efile.btf_maps_shndx)
6406                                 continue;
6407
6408                         vi = btf_var_secinfos(sec) + map->btf_var_idx;
6409                         if (vi->offset <= rel->r_offset &&
6410                             rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
6411                                 break;
6412                 }
6413                 if (j == obj->nr_maps) {
6414                         pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
6415                                 i, name, (size_t)rel->r_offset);
6416                         return -EINVAL;
6417                 }
6418
6419                 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
6420                 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
6421                 type = is_map_in_map ? "map" : "prog";
6422                 if (is_map_in_map) {
6423                         if (sym->st_shndx != obj->efile.btf_maps_shndx) {
6424                                 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6425                                         i, name);
6426                                 return -LIBBPF_ERRNO__RELOC;
6427                         }
6428                         if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
6429                             map->def.key_size != sizeof(int)) {
6430                                 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6431                                         i, map->name, sizeof(int));
6432                                 return -EINVAL;
6433                         }
6434                         targ_map = bpf_object__find_map_by_name(obj, name);
6435                         if (!targ_map) {
6436                                 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
6437                                         i, name);
6438                                 return -ESRCH;
6439                         }
6440                 } else if (is_prog_array) {
6441                         targ_prog = bpf_object__find_program_by_name(obj, name);
6442                         if (!targ_prog) {
6443                                 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
6444                                         i, name);
6445                                 return -ESRCH;
6446                         }
6447                         if (targ_prog->sec_idx != sym->st_shndx ||
6448                             targ_prog->sec_insn_off * 8 != sym->st_value ||
6449                             prog_is_subprog(obj, targ_prog)) {
6450                                 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
6451                                         i, name);
6452                                 return -LIBBPF_ERRNO__RELOC;
6453                         }
6454                 } else {
6455                         return -EINVAL;
6456                 }
6457
6458                 var = btf__type_by_id(obj->btf, vi->type);
6459                 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
6460                 if (btf_vlen(def) == 0)
6461                         return -EINVAL;
6462                 member = btf_members(def) + btf_vlen(def) - 1;
6463                 mname = btf__name_by_offset(obj->btf, member->name_off);
6464                 if (strcmp(mname, "values"))
6465                         return -EINVAL;
6466
6467                 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
6468                 if (rel->r_offset - vi->offset < moff)
6469                         return -EINVAL;
6470
6471                 moff = rel->r_offset - vi->offset - moff;
6472                 /* here we use BPF pointer size, which is always 64 bit, as we
6473                  * are parsing ELF that was built for BPF target
6474                  */
6475                 if (moff % bpf_ptr_sz)
6476                         return -EINVAL;
6477                 moff /= bpf_ptr_sz;
6478                 if (moff >= map->init_slots_sz) {
6479                         new_sz = moff + 1;
6480                         tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
6481                         if (!tmp)
6482                                 return -ENOMEM;
6483                         map->init_slots = tmp;
6484                         memset(map->init_slots + map->init_slots_sz, 0,
6485                                (new_sz - map->init_slots_sz) * host_ptr_sz);
6486                         map->init_slots_sz = new_sz;
6487                 }
6488                 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
6489
6490                 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
6491                          i, map->name, moff, type, name);
6492         }
6493
6494         return 0;
6495 }
6496
6497 static int bpf_object__collect_relos(struct bpf_object *obj)
6498 {
6499         int i, err;
6500
6501         for (i = 0; i < obj->efile.sec_cnt; i++) {
6502                 struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
6503                 Elf64_Shdr *shdr;
6504                 Elf_Data *data;
6505                 int idx;
6506
6507                 if (sec_desc->sec_type != SEC_RELO)
6508                         continue;
6509
6510                 shdr = sec_desc->shdr;
6511                 data = sec_desc->data;
6512                 idx = shdr->sh_info;
6513
6514                 if (shdr->sh_type != SHT_REL) {
6515                         pr_warn("internal error at %d\n", __LINE__);
6516                         return -LIBBPF_ERRNO__INTERNAL;
6517                 }
6518
6519                 if (idx == obj->efile.st_ops_shndx)
6520                         err = bpf_object__collect_st_ops_relos(obj, shdr, data);
6521                 else if (idx == obj->efile.btf_maps_shndx)
6522                         err = bpf_object__collect_map_relos(obj, shdr, data);
6523                 else
6524                         err = bpf_object__collect_prog_relos(obj, shdr, data);
6525                 if (err)
6526                         return err;
6527         }
6528
6529         bpf_object__sort_relos(obj);
6530         return 0;
6531 }
6532
6533 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
6534 {
6535         if (BPF_CLASS(insn->code) == BPF_JMP &&
6536             BPF_OP(insn->code) == BPF_CALL &&
6537             BPF_SRC(insn->code) == BPF_K &&
6538             insn->src_reg == 0 &&
6539             insn->dst_reg == 0) {
6540                     *func_id = insn->imm;
6541                     return true;
6542         }
6543         return false;
6544 }
6545
6546 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
6547 {
6548         struct bpf_insn *insn = prog->insns;
6549         enum bpf_func_id func_id;
6550         int i;
6551
6552         if (obj->gen_loader)
6553                 return 0;
6554
6555         for (i = 0; i < prog->insns_cnt; i++, insn++) {
6556                 if (!insn_is_helper_call(insn, &func_id))
6557                         continue;
6558
6559                 /* on kernels that don't yet support
6560                  * bpf_probe_read_{kernel,user}[_str] helpers, fall back
6561                  * to bpf_probe_read() which works well for old kernels
6562                  */
6563                 switch (func_id) {
6564                 case BPF_FUNC_probe_read_kernel:
6565                 case BPF_FUNC_probe_read_user:
6566                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
6567                                 insn->imm = BPF_FUNC_probe_read;
6568                         break;
6569                 case BPF_FUNC_probe_read_kernel_str:
6570                 case BPF_FUNC_probe_read_user_str:
6571                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
6572                                 insn->imm = BPF_FUNC_probe_read_str;
6573                         break;
6574                 default:
6575                         break;
6576                 }
6577         }
6578         return 0;
6579 }
6580
6581 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
6582                                      int *btf_obj_fd, int *btf_type_id);
6583
6584 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
6585 static int libbpf_prepare_prog_load(struct bpf_program *prog,
6586                                     struct bpf_prog_load_opts *opts, long cookie)
6587 {
6588         enum sec_def_flags def = cookie;
6589
6590         /* old kernels might not support specifying expected_attach_type */
6591         if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
6592                 opts->expected_attach_type = 0;
6593
6594         if (def & SEC_SLEEPABLE)
6595                 opts->prog_flags |= BPF_F_SLEEPABLE;
6596
6597         if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
6598                 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
6599
6600         if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
6601                 int btf_obj_fd = 0, btf_type_id = 0, err;
6602                 const char *attach_name;
6603
6604                 attach_name = strchr(prog->sec_name, '/');
6605                 if (!attach_name) {
6606                         /* if BPF program is annotated with just SEC("fentry")
6607                          * (or similar) without declaratively specifying
6608                          * target, then it is expected that target will be
6609                          * specified with bpf_program__set_attach_target() at
6610                          * runtime before BPF object load step. If not, then
6611                          * there is nothing to load into the kernel as BPF
6612                          * verifier won't be able to validate BPF program
6613                          * correctness anyways.
6614                          */
6615                         pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
6616                                 prog->name);
6617                         return -EINVAL;
6618                 }
6619                 attach_name++; /* skip over / */
6620
6621                 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
6622                 if (err)
6623                         return err;
6624
6625                 /* cache resolved BTF FD and BTF type ID in the prog */
6626                 prog->attach_btf_obj_fd = btf_obj_fd;
6627                 prog->attach_btf_id = btf_type_id;
6628
6629                 /* but by now libbpf common logic is not utilizing
6630                  * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
6631                  * this callback is called after opts were populated by
6632                  * libbpf, so this callback has to update opts explicitly here
6633                  */
6634                 opts->attach_btf_obj_fd = btf_obj_fd;
6635                 opts->attach_btf_id = btf_type_id;
6636         }
6637         return 0;
6638 }
6639
6640 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
6641
6642 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
6643                                 struct bpf_insn *insns, int insns_cnt,
6644                                 const char *license, __u32 kern_version, int *prog_fd)
6645 {
6646         LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
6647         const char *prog_name = NULL;
6648         char *cp, errmsg[STRERR_BUFSIZE];
6649         size_t log_buf_size = 0;
6650         char *log_buf = NULL, *tmp;
6651         int btf_fd, ret, err;
6652         bool own_log_buf = true;
6653         __u32 log_level = prog->log_level;
6654
6655         if (prog->type == BPF_PROG_TYPE_UNSPEC) {
6656                 /*
6657                  * The program type must be set.  Most likely we couldn't find a proper
6658                  * section definition at load time, and thus we didn't infer the type.
6659                  */
6660                 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
6661                         prog->name, prog->sec_name);
6662                 return -EINVAL;
6663         }
6664
6665         if (!insns || !insns_cnt)
6666                 return -EINVAL;
6667
6668         load_attr.expected_attach_type = prog->expected_attach_type;
6669         if (kernel_supports(obj, FEAT_PROG_NAME))
6670                 prog_name = prog->name;
6671         load_attr.attach_prog_fd = prog->attach_prog_fd;
6672         load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
6673         load_attr.attach_btf_id = prog->attach_btf_id;
6674         load_attr.kern_version = kern_version;
6675         load_attr.prog_ifindex = prog->prog_ifindex;
6676
6677         /* specify func_info/line_info only if kernel supports them */
6678         btf_fd = bpf_object__btf_fd(obj);
6679         if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
6680                 load_attr.prog_btf_fd = btf_fd;
6681                 load_attr.func_info = prog->func_info;
6682                 load_attr.func_info_rec_size = prog->func_info_rec_size;
6683                 load_attr.func_info_cnt = prog->func_info_cnt;
6684                 load_attr.line_info = prog->line_info;
6685                 load_attr.line_info_rec_size = prog->line_info_rec_size;
6686                 load_attr.line_info_cnt = prog->line_info_cnt;
6687         }
6688         load_attr.log_level = log_level;
6689         load_attr.prog_flags = prog->prog_flags;
6690         load_attr.fd_array = obj->fd_array;
6691
6692         /* adjust load_attr if sec_def provides custom preload callback */
6693         if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
6694                 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
6695                 if (err < 0) {
6696                         pr_warn("prog '%s': failed to prepare load attributes: %d\n",
6697                                 prog->name, err);
6698                         return err;
6699                 }
6700                 insns = prog->insns;
6701                 insns_cnt = prog->insns_cnt;
6702         }
6703
6704         if (obj->gen_loader) {
6705                 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
6706                                    license, insns, insns_cnt, &load_attr,
6707                                    prog - obj->programs);
6708                 *prog_fd = -1;
6709                 return 0;
6710         }
6711
6712 retry_load:
6713         /* if log_level is zero, we don't request logs initially even if
6714          * custom log_buf is specified; if the program load fails, then we'll
6715          * bump log_level to 1 and use either custom log_buf or we'll allocate
6716          * our own and retry the load to get details on what failed
6717          */
6718         if (log_level) {
6719                 if (prog->log_buf) {
6720                         log_buf = prog->log_buf;
6721                         log_buf_size = prog->log_size;
6722                         own_log_buf = false;
6723                 } else if (obj->log_buf) {
6724                         log_buf = obj->log_buf;
6725                         log_buf_size = obj->log_size;
6726                         own_log_buf = false;
6727                 } else {
6728                         log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
6729                         tmp = realloc(log_buf, log_buf_size);
6730                         if (!tmp) {
6731                                 ret = -ENOMEM;
6732                                 goto out;
6733                         }
6734                         log_buf = tmp;
6735                         log_buf[0] = '\0';
6736                         own_log_buf = true;
6737                 }
6738         }
6739
6740         load_attr.log_buf = log_buf;
6741         load_attr.log_size = log_buf_size;
6742         load_attr.log_level = log_level;
6743
6744         ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
6745         if (ret >= 0) {
6746                 if (log_level && own_log_buf) {
6747                         pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
6748                                  prog->name, log_buf);
6749                 }
6750
6751                 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
6752                         struct bpf_map *map;
6753                         int i;
6754
6755                         for (i = 0; i < obj->nr_maps; i++) {
6756                                 map = &prog->obj->maps[i];
6757                                 if (map->libbpf_type != LIBBPF_MAP_RODATA)
6758                                         continue;
6759
6760                                 if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) {
6761                                         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6762                                         pr_warn("prog '%s': failed to bind map '%s': %s\n",
6763                                                 prog->name, map->real_name, cp);
6764                                         /* Don't fail hard if can't bind rodata. */
6765                                 }
6766                         }
6767                 }
6768
6769                 *prog_fd = ret;
6770                 ret = 0;
6771                 goto out;
6772         }
6773
6774         if (log_level == 0) {
6775                 log_level = 1;
6776                 goto retry_load;
6777         }
6778         /* On ENOSPC, increase log buffer size and retry, unless custom
6779          * log_buf is specified.
6780          * Be careful to not overflow u32, though. Kernel's log buf size limit
6781          * isn't part of UAPI so it can always be bumped to full 4GB. So don't
6782          * multiply by 2 unless we are sure we'll fit within 32 bits.
6783          * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
6784          */
6785         if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
6786                 goto retry_load;
6787
6788         ret = -errno;
6789
6790         /* post-process verifier log to improve error descriptions */
6791         fixup_verifier_log(prog, log_buf, log_buf_size);
6792
6793         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6794         pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
6795         pr_perm_msg(ret);
6796
6797         if (own_log_buf && log_buf && log_buf[0] != '\0') {
6798                 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
6799                         prog->name, log_buf);
6800         }
6801
6802 out:
6803         if (own_log_buf)
6804                 free(log_buf);
6805         return ret;
6806 }
6807
6808 static char *find_prev_line(char *buf, char *cur)
6809 {
6810         char *p;
6811
6812         if (cur == buf) /* end of a log buf */
6813                 return NULL;
6814
6815         p = cur - 1;
6816         while (p - 1 >= buf && *(p - 1) != '\n')
6817                 p--;
6818
6819         return p;
6820 }
6821
6822 static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
6823                       char *orig, size_t orig_sz, const char *patch)
6824 {
6825         /* size of the remaining log content to the right from the to-be-replaced part */
6826         size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
6827         size_t patch_sz = strlen(patch);
6828
6829         if (patch_sz != orig_sz) {
6830                 /* If patch line(s) are longer than original piece of verifier log,
6831                  * shift log contents by (patch_sz - orig_sz) bytes to the right
6832                  * starting from after to-be-replaced part of the log.
6833                  *
6834                  * If patch line(s) are shorter than original piece of verifier log,
6835                  * shift log contents by (orig_sz - patch_sz) bytes to the left
6836                  * starting from after to-be-replaced part of the log
6837                  *
6838                  * We need to be careful about not overflowing available
6839                  * buf_sz capacity. If that's the case, we'll truncate the end
6840                  * of the original log, as necessary.
6841                  */
6842                 if (patch_sz > orig_sz) {
6843                         if (orig + patch_sz >= buf + buf_sz) {
6844                                 /* patch is big enough to cover remaining space completely */
6845                                 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
6846                                 rem_sz = 0;
6847                         } else if (patch_sz - orig_sz > buf_sz - log_sz) {
6848                                 /* patch causes part of remaining log to be truncated */
6849                                 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
6850                         }
6851                 }
6852                 /* shift remaining log to the right by calculated amount */
6853                 memmove(orig + patch_sz, orig + orig_sz, rem_sz);
6854         }
6855
6856         memcpy(orig, patch, patch_sz);
6857 }
6858
6859 static void fixup_log_failed_core_relo(struct bpf_program *prog,
6860                                        char *buf, size_t buf_sz, size_t log_sz,
6861                                        char *line1, char *line2, char *line3)
6862 {
6863         /* Expected log for failed and not properly guarded CO-RE relocation:
6864          * line1 -> 123: (85) call unknown#195896080
6865          * line2 -> invalid func unknown#195896080
6866          * line3 -> <anything else or end of buffer>
6867          *
6868          * "123" is the index of the instruction that was poisoned. We extract
6869          * instruction index to find corresponding CO-RE relocation and
6870          * replace this part of the log with more relevant information about
6871          * failed CO-RE relocation.
6872          */
6873         const struct bpf_core_relo *relo;
6874         struct bpf_core_spec spec;
6875         char patch[512], spec_buf[256];
6876         int insn_idx, err, spec_len;
6877
6878         if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
6879                 return;
6880
6881         relo = find_relo_core(prog, insn_idx);
6882         if (!relo)
6883                 return;
6884
6885         err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
6886         if (err)
6887                 return;
6888
6889         spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
6890         snprintf(patch, sizeof(patch),
6891                  "%d: <invalid CO-RE relocation>\n"
6892                  "failed to resolve CO-RE relocation %s%s\n",
6893                  insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
6894
6895         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
6896 }
6897
6898 static void fixup_log_missing_map_load(struct bpf_program *prog,
6899                                        char *buf, size_t buf_sz, size_t log_sz,
6900                                        char *line1, char *line2, char *line3)
6901 {
6902         /* Expected log for failed and not properly guarded CO-RE relocation:
6903          * line1 -> 123: (85) call unknown#2001000345
6904          * line2 -> invalid func unknown#2001000345
6905          * line3 -> <anything else or end of buffer>
6906          *
6907          * "123" is the index of the instruction that was poisoned.
6908          * "345" in "2001000345" are map index in obj->maps to fetch map name.
6909          */
6910         struct bpf_object *obj = prog->obj;
6911         const struct bpf_map *map;
6912         int insn_idx, map_idx;
6913         char patch[128];
6914
6915         if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
6916                 return;
6917
6918         map_idx -= MAP_LDIMM64_POISON_BASE;
6919         if (map_idx < 0 || map_idx >= obj->nr_maps)
6920                 return;
6921         map = &obj->maps[map_idx];
6922
6923         snprintf(patch, sizeof(patch),
6924                  "%d: <invalid BPF map reference>\n"
6925                  "BPF map '%s' is referenced but wasn't created\n",
6926                  insn_idx, map->name);
6927
6928         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
6929 }
6930
6931 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
6932 {
6933         /* look for familiar error patterns in last N lines of the log */
6934         const size_t max_last_line_cnt = 10;
6935         char *prev_line, *cur_line, *next_line;
6936         size_t log_sz;
6937         int i;
6938
6939         if (!buf)
6940                 return;
6941
6942         log_sz = strlen(buf) + 1;
6943         next_line = buf + log_sz - 1;
6944
6945         for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
6946                 cur_line = find_prev_line(buf, next_line);
6947                 if (!cur_line)
6948                         return;
6949
6950                 /* failed CO-RE relocation case */
6951                 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
6952                         prev_line = find_prev_line(buf, cur_line);
6953                         if (!prev_line)
6954                                 continue;
6955
6956                         fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
6957                                                    prev_line, cur_line, next_line);
6958                         return;
6959                 } else if (str_has_pfx(cur_line, "invalid func unknown#"MAP_LDIMM64_POISON_PFX)) {
6960                         prev_line = find_prev_line(buf, cur_line);
6961                         if (!prev_line)
6962                                 continue;
6963
6964                         fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
6965                                                    prev_line, cur_line, next_line);
6966                         return;
6967                 }
6968         }
6969 }
6970
6971 static int bpf_program_record_relos(struct bpf_program *prog)
6972 {
6973         struct bpf_object *obj = prog->obj;
6974         int i;
6975
6976         for (i = 0; i < prog->nr_reloc; i++) {
6977                 struct reloc_desc *relo = &prog->reloc_desc[i];
6978                 struct extern_desc *ext = &obj->externs[relo->sym_off];
6979
6980                 switch (relo->type) {
6981                 case RELO_EXTERN_VAR:
6982                         if (ext->type != EXT_KSYM)
6983                                 continue;
6984                         bpf_gen__record_extern(obj->gen_loader, ext->name,
6985                                                ext->is_weak, !ext->ksym.type_id,
6986                                                BTF_KIND_VAR, relo->insn_idx);
6987                         break;
6988                 case RELO_EXTERN_FUNC:
6989                         bpf_gen__record_extern(obj->gen_loader, ext->name,
6990                                                ext->is_weak, false, BTF_KIND_FUNC,
6991                                                relo->insn_idx);
6992                         break;
6993                 case RELO_CORE: {
6994                         struct bpf_core_relo cr = {
6995                                 .insn_off = relo->insn_idx * 8,
6996                                 .type_id = relo->core_relo->type_id,
6997                                 .access_str_off = relo->core_relo->access_str_off,
6998                                 .kind = relo->core_relo->kind,
6999                         };
7000
7001                         bpf_gen__record_relo_core(obj->gen_loader, &cr);
7002                         break;
7003                 }
7004                 default:
7005                         continue;
7006                 }
7007         }
7008         return 0;
7009 }
7010
7011 static int
7012 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7013 {
7014         struct bpf_program *prog;
7015         size_t i;
7016         int err;
7017
7018         for (i = 0; i < obj->nr_programs; i++) {
7019                 prog = &obj->programs[i];
7020                 err = bpf_object__sanitize_prog(obj, prog);
7021                 if (err)
7022                         return err;
7023         }
7024
7025         for (i = 0; i < obj->nr_programs; i++) {
7026                 prog = &obj->programs[i];
7027                 if (prog_is_subprog(obj, prog))
7028                         continue;
7029                 if (!prog->autoload) {
7030                         pr_debug("prog '%s': skipped loading\n", prog->name);
7031                         continue;
7032                 }
7033                 prog->log_level |= log_level;
7034
7035                 if (obj->gen_loader)
7036                         bpf_program_record_relos(prog);
7037
7038                 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
7039                                            obj->license, obj->kern_version, &prog->fd);
7040                 if (err) {
7041                         pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
7042                         return err;
7043                 }
7044         }
7045
7046         bpf_object__free_relocs(obj);
7047         return 0;
7048 }
7049
7050 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7051
7052 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
7053 {
7054         struct bpf_program *prog;
7055         int err;
7056
7057         bpf_object__for_each_program(prog, obj) {
7058                 prog->sec_def = find_sec_def(prog->sec_name);
7059                 if (!prog->sec_def) {
7060                         /* couldn't guess, but user might manually specify */
7061                         pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7062                                 prog->name, prog->sec_name);
7063                         continue;
7064                 }
7065
7066                 prog->type = prog->sec_def->prog_type;
7067                 prog->expected_attach_type = prog->sec_def->expected_attach_type;
7068
7069                 /* sec_def can have custom callback which should be called
7070                  * after bpf_program is initialized to adjust its properties
7071                  */
7072                 if (prog->sec_def->prog_setup_fn) {
7073                         err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
7074                         if (err < 0) {
7075                                 pr_warn("prog '%s': failed to initialize: %d\n",
7076                                         prog->name, err);
7077                                 return err;
7078                         }
7079                 }
7080         }
7081
7082         return 0;
7083 }
7084
7085 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7086                                           const struct bpf_object_open_opts *opts)
7087 {
7088         const char *obj_name, *kconfig, *btf_tmp_path;
7089         struct bpf_object *obj;
7090         char tmp_name[64];
7091         int err;
7092         char *log_buf;
7093         size_t log_size;
7094         __u32 log_level;
7095
7096         if (elf_version(EV_CURRENT) == EV_NONE) {
7097                 pr_warn("failed to init libelf for %s\n",
7098                         path ? : "(mem buf)");
7099                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7100         }
7101
7102         if (!OPTS_VALID(opts, bpf_object_open_opts))
7103                 return ERR_PTR(-EINVAL);
7104
7105         obj_name = OPTS_GET(opts, object_name, NULL);
7106         if (obj_buf) {
7107                 if (!obj_name) {
7108                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
7109                                  (unsigned long)obj_buf,
7110                                  (unsigned long)obj_buf_sz);
7111                         obj_name = tmp_name;
7112                 }
7113                 path = obj_name;
7114                 pr_debug("loading object '%s' from buffer\n", obj_name);
7115         }
7116
7117         log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
7118         log_size = OPTS_GET(opts, kernel_log_size, 0);
7119         log_level = OPTS_GET(opts, kernel_log_level, 0);
7120         if (log_size > UINT_MAX)
7121                 return ERR_PTR(-EINVAL);
7122         if (log_size && !log_buf)
7123                 return ERR_PTR(-EINVAL);
7124
7125         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
7126         if (IS_ERR(obj))
7127                 return obj;
7128
7129         obj->log_buf = log_buf;
7130         obj->log_size = log_size;
7131         obj->log_level = log_level;
7132
7133         btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
7134         if (btf_tmp_path) {
7135                 if (strlen(btf_tmp_path) >= PATH_MAX) {
7136                         err = -ENAMETOOLONG;
7137                         goto out;
7138                 }
7139                 obj->btf_custom_path = strdup(btf_tmp_path);
7140                 if (!obj->btf_custom_path) {
7141                         err = -ENOMEM;
7142                         goto out;
7143                 }
7144         }
7145
7146         kconfig = OPTS_GET(opts, kconfig, NULL);
7147         if (kconfig) {
7148                 obj->kconfig = strdup(kconfig);
7149                 if (!obj->kconfig) {
7150                         err = -ENOMEM;
7151                         goto out;
7152                 }
7153         }
7154
7155         err = bpf_object__elf_init(obj);
7156         err = err ? : bpf_object__check_endianness(obj);
7157         err = err ? : bpf_object__elf_collect(obj);
7158         err = err ? : bpf_object__collect_externs(obj);
7159         err = err ? : bpf_object__finalize_btf(obj);
7160         err = err ? : bpf_object__init_maps(obj, opts);
7161         err = err ? : bpf_object_init_progs(obj, opts);
7162         err = err ? : bpf_object__collect_relos(obj);
7163         if (err)
7164                 goto out;
7165
7166         bpf_object__elf_finish(obj);
7167
7168         return obj;
7169 out:
7170         bpf_object__close(obj);
7171         return ERR_PTR(err);
7172 }
7173
7174 struct bpf_object *
7175 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
7176 {
7177         if (!path)
7178                 return libbpf_err_ptr(-EINVAL);
7179
7180         pr_debug("loading %s\n", path);
7181
7182         return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
7183 }
7184
7185 struct bpf_object *bpf_object__open(const char *path)
7186 {
7187         return bpf_object__open_file(path, NULL);
7188 }
7189
7190 struct bpf_object *
7191 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7192                      const struct bpf_object_open_opts *opts)
7193 {
7194         if (!obj_buf || obj_buf_sz == 0)
7195                 return libbpf_err_ptr(-EINVAL);
7196
7197         return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
7198 }
7199
7200 static int bpf_object_unload(struct bpf_object *obj)
7201 {
7202         size_t i;
7203
7204         if (!obj)
7205                 return libbpf_err(-EINVAL);
7206
7207         for (i = 0; i < obj->nr_maps; i++) {
7208                 zclose(obj->maps[i].fd);
7209                 if (obj->maps[i].st_ops)
7210                         zfree(&obj->maps[i].st_ops->kern_vdata);
7211         }
7212
7213         for (i = 0; i < obj->nr_programs; i++)
7214                 bpf_program__unload(&obj->programs[i]);
7215
7216         return 0;
7217 }
7218
7219 int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload")));
7220
7221 static int bpf_object__sanitize_maps(struct bpf_object *obj)
7222 {
7223         struct bpf_map *m;
7224
7225         bpf_object__for_each_map(m, obj) {
7226                 if (!bpf_map__is_internal(m))
7227                         continue;
7228                 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
7229                         m->def.map_flags ^= BPF_F_MMAPABLE;
7230         }
7231
7232         return 0;
7233 }
7234
7235 int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
7236 {
7237         char sym_type, sym_name[500];
7238         unsigned long long sym_addr;
7239         int ret, err = 0;
7240         FILE *f;
7241
7242         f = fopen("/proc/kallsyms", "r");
7243         if (!f) {
7244                 err = -errno;
7245                 pr_warn("failed to open /proc/kallsyms: %d\n", err);
7246                 return err;
7247         }
7248
7249         while (true) {
7250                 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7251                              &sym_addr, &sym_type, sym_name);
7252                 if (ret == EOF && feof(f))
7253                         break;
7254                 if (ret != 3) {
7255                         pr_warn("failed to read kallsyms entry: %d\n", ret);
7256                         err = -EINVAL;
7257                         break;
7258                 }
7259
7260                 err = cb(sym_addr, sym_type, sym_name, ctx);
7261                 if (err)
7262                         break;
7263         }
7264
7265         fclose(f);
7266         return err;
7267 }
7268
7269 static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
7270                        const char *sym_name, void *ctx)
7271 {
7272         struct bpf_object *obj = ctx;
7273         const struct btf_type *t;
7274         struct extern_desc *ext;
7275
7276         ext = find_extern_by_name(obj, sym_name);
7277         if (!ext || ext->type != EXT_KSYM)
7278                 return 0;
7279
7280         t = btf__type_by_id(obj->btf, ext->btf_id);
7281         if (!btf_is_var(t))
7282                 return 0;
7283
7284         if (ext->is_set && ext->ksym.addr != sym_addr) {
7285                 pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
7286                         sym_name, ext->ksym.addr, sym_addr);
7287                 return -EINVAL;
7288         }
7289         if (!ext->is_set) {
7290                 ext->is_set = true;
7291                 ext->ksym.addr = sym_addr;
7292                 pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
7293         }
7294         return 0;
7295 }
7296
7297 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7298 {
7299         return libbpf_kallsyms_parse(kallsyms_cb, obj);
7300 }
7301
7302 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
7303                             __u16 kind, struct btf **res_btf,
7304                             struct module_btf **res_mod_btf)
7305 {
7306         struct module_btf *mod_btf;
7307         struct btf *btf;
7308         int i, id, err;
7309
7310         btf = obj->btf_vmlinux;
7311         mod_btf = NULL;
7312         id = btf__find_by_name_kind(btf, ksym_name, kind);
7313
7314         if (id == -ENOENT) {
7315                 err = load_module_btfs(obj);
7316                 if (err)
7317                         return err;
7318
7319                 for (i = 0; i < obj->btf_module_cnt; i++) {
7320                         /* we assume module_btf's BTF FD is always >0 */
7321                         mod_btf = &obj->btf_modules[i];
7322                         btf = mod_btf->btf;
7323                         id = btf__find_by_name_kind_own(btf, ksym_name, kind);
7324                         if (id != -ENOENT)
7325                                 break;
7326                 }
7327         }
7328         if (id <= 0)
7329                 return -ESRCH;
7330
7331         *res_btf = btf;
7332         *res_mod_btf = mod_btf;
7333         return id;
7334 }
7335
7336 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
7337                                                struct extern_desc *ext)
7338 {
7339         const struct btf_type *targ_var, *targ_type;
7340         __u32 targ_type_id, local_type_id;
7341         struct module_btf *mod_btf = NULL;
7342         const char *targ_var_name;
7343         struct btf *btf = NULL;
7344         int id, err;
7345
7346         id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
7347         if (id < 0) {
7348                 if (id == -ESRCH && ext->is_weak)
7349                         return 0;
7350                 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
7351                         ext->name);
7352                 return id;
7353         }
7354
7355         /* find local type_id */
7356         local_type_id = ext->ksym.type_id;
7357
7358         /* find target type_id */
7359         targ_var = btf__type_by_id(btf, id);
7360         targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
7361         targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
7362
7363         err = bpf_core_types_are_compat(obj->btf, local_type_id,
7364                                         btf, targ_type_id);
7365         if (err <= 0) {
7366                 const struct btf_type *local_type;
7367                 const char *targ_name, *local_name;
7368
7369                 local_type = btf__type_by_id(obj->btf, local_type_id);
7370                 local_name = btf__name_by_offset(obj->btf, local_type->name_off);
7371                 targ_name = btf__name_by_offset(btf, targ_type->name_off);
7372
7373                 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7374                         ext->name, local_type_id,
7375                         btf_kind_str(local_type), local_name, targ_type_id,
7376                         btf_kind_str(targ_type), targ_name);
7377                 return -EINVAL;
7378         }
7379
7380         ext->is_set = true;
7381         ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
7382         ext->ksym.kernel_btf_id = id;
7383         pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
7384                  ext->name, id, btf_kind_str(targ_var), targ_var_name);
7385
7386         return 0;
7387 }
7388
7389 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
7390                                                 struct extern_desc *ext)
7391 {
7392         int local_func_proto_id, kfunc_proto_id, kfunc_id;
7393         struct module_btf *mod_btf = NULL;
7394         const struct btf_type *kern_func;
7395         struct btf *kern_btf = NULL;
7396         int ret;
7397
7398         local_func_proto_id = ext->ksym.type_id;
7399
7400         kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf);
7401         if (kfunc_id < 0) {
7402                 if (kfunc_id == -ESRCH && ext->is_weak)
7403                         return 0;
7404                 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
7405                         ext->name);
7406                 return kfunc_id;
7407         }
7408
7409         kern_func = btf__type_by_id(kern_btf, kfunc_id);
7410         kfunc_proto_id = kern_func->type;
7411
7412         ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
7413                                         kern_btf, kfunc_proto_id);
7414         if (ret <= 0) {
7415                 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n",
7416                         ext->name, local_func_proto_id, kfunc_proto_id);
7417                 return -EINVAL;
7418         }
7419
7420         /* set index for module BTF fd in fd_array, if unset */
7421         if (mod_btf && !mod_btf->fd_array_idx) {
7422                 /* insn->off is s16 */
7423                 if (obj->fd_array_cnt == INT16_MAX) {
7424                         pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
7425                                 ext->name, mod_btf->fd_array_idx);
7426                         return -E2BIG;
7427                 }
7428                 /* Cannot use index 0 for module BTF fd */
7429                 if (!obj->fd_array_cnt)
7430                         obj->fd_array_cnt = 1;
7431
7432                 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
7433                                         obj->fd_array_cnt + 1);
7434                 if (ret)
7435                         return ret;
7436                 mod_btf->fd_array_idx = obj->fd_array_cnt;
7437                 /* we assume module BTF FD is always >0 */
7438                 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
7439         }
7440
7441         ext->is_set = true;
7442         ext->ksym.kernel_btf_id = kfunc_id;
7443         ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
7444         pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n",
7445                  ext->name, kfunc_id);
7446
7447         return 0;
7448 }
7449
7450 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
7451 {
7452         const struct btf_type *t;
7453         struct extern_desc *ext;
7454         int i, err;
7455
7456         for (i = 0; i < obj->nr_extern; i++) {
7457                 ext = &obj->externs[i];
7458                 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
7459                         continue;
7460
7461                 if (obj->gen_loader) {
7462                         ext->is_set = true;
7463                         ext->ksym.kernel_btf_obj_fd = 0;
7464                         ext->ksym.kernel_btf_id = 0;
7465                         continue;
7466                 }
7467                 t = btf__type_by_id(obj->btf, ext->btf_id);
7468                 if (btf_is_var(t))
7469                         err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
7470                 else
7471                         err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
7472                 if (err)
7473                         return err;
7474         }
7475         return 0;
7476 }
7477
7478 static int bpf_object__resolve_externs(struct bpf_object *obj,
7479                                        const char *extra_kconfig)
7480 {
7481         bool need_config = false, need_kallsyms = false;
7482         bool need_vmlinux_btf = false;
7483         struct extern_desc *ext;
7484         void *kcfg_data = NULL;
7485         int err, i;
7486
7487         if (obj->nr_extern == 0)
7488                 return 0;
7489
7490         if (obj->kconfig_map_idx >= 0)
7491                 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
7492
7493         for (i = 0; i < obj->nr_extern; i++) {
7494                 ext = &obj->externs[i];
7495
7496                 if (ext->type == EXT_KCFG &&
7497                     strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
7498                         void *ext_val = kcfg_data + ext->kcfg.data_off;
7499                         __u32 kver = get_kernel_version();
7500
7501                         if (!kver) {
7502                                 pr_warn("failed to get kernel version\n");
7503                                 return -EINVAL;
7504                         }
7505                         err = set_kcfg_value_num(ext, ext_val, kver);
7506                         if (err)
7507                                 return err;
7508                         pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
7509                 } else if (ext->type == EXT_KCFG && str_has_pfx(ext->name, "CONFIG_")) {
7510                         need_config = true;
7511                 } else if (ext->type == EXT_KSYM) {
7512                         if (ext->ksym.type_id)
7513                                 need_vmlinux_btf = true;
7514                         else
7515                                 need_kallsyms = true;
7516                 } else {
7517                         pr_warn("unrecognized extern '%s'\n", ext->name);
7518                         return -EINVAL;
7519                 }
7520         }
7521         if (need_config && extra_kconfig) {
7522                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
7523                 if (err)
7524                         return -EINVAL;
7525                 need_config = false;
7526                 for (i = 0; i < obj->nr_extern; i++) {
7527                         ext = &obj->externs[i];
7528                         if (ext->type == EXT_KCFG && !ext->is_set) {
7529                                 need_config = true;
7530                                 break;
7531                         }
7532                 }
7533         }
7534         if (need_config) {
7535                 err = bpf_object__read_kconfig_file(obj, kcfg_data);
7536                 if (err)
7537                         return -EINVAL;
7538         }
7539         if (need_kallsyms) {
7540                 err = bpf_object__read_kallsyms_file(obj);
7541                 if (err)
7542                         return -EINVAL;
7543         }
7544         if (need_vmlinux_btf) {
7545                 err = bpf_object__resolve_ksyms_btf_id(obj);
7546                 if (err)
7547                         return -EINVAL;
7548         }
7549         for (i = 0; i < obj->nr_extern; i++) {
7550                 ext = &obj->externs[i];
7551
7552                 if (!ext->is_set && !ext->is_weak) {
7553                         pr_warn("extern %s (strong) not resolved\n", ext->name);
7554                         return -ESRCH;
7555                 } else if (!ext->is_set) {
7556                         pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
7557                                  ext->name);
7558                 }
7559         }
7560
7561         return 0;
7562 }
7563
7564 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
7565 {
7566         int err, i;
7567
7568         if (!obj)
7569                 return libbpf_err(-EINVAL);
7570
7571         if (obj->loaded) {
7572                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
7573                 return libbpf_err(-EINVAL);
7574         }
7575
7576         if (obj->gen_loader)
7577                 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
7578
7579         err = bpf_object__probe_loading(obj);
7580         err = err ? : bpf_object__load_vmlinux_btf(obj, false);
7581         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
7582         err = err ? : bpf_object__sanitize_and_load_btf(obj);
7583         err = err ? : bpf_object__sanitize_maps(obj);
7584         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
7585         err = err ? : bpf_object__create_maps(obj);
7586         err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
7587         err = err ? : bpf_object__load_progs(obj, extra_log_level);
7588         err = err ? : bpf_object_init_prog_arrays(obj);
7589
7590         if (obj->gen_loader) {
7591                 /* reset FDs */
7592                 if (obj->btf)
7593                         btf__set_fd(obj->btf, -1);
7594                 for (i = 0; i < obj->nr_maps; i++)
7595                         obj->maps[i].fd = -1;
7596                 if (!err)
7597                         err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
7598         }
7599
7600         /* clean up fd_array */
7601         zfree(&obj->fd_array);
7602
7603         /* clean up module BTFs */
7604         for (i = 0; i < obj->btf_module_cnt; i++) {
7605                 close(obj->btf_modules[i].fd);
7606                 btf__free(obj->btf_modules[i].btf);
7607                 free(obj->btf_modules[i].name);
7608         }
7609         free(obj->btf_modules);
7610
7611         /* clean up vmlinux BTF */
7612         btf__free(obj->btf_vmlinux);
7613         obj->btf_vmlinux = NULL;
7614
7615         obj->loaded = true; /* doesn't matter if successfully or not */
7616
7617         if (err)
7618                 goto out;
7619
7620         return 0;
7621 out:
7622         /* unpin any maps that were auto-pinned during load */
7623         for (i = 0; i < obj->nr_maps; i++)
7624                 if (obj->maps[i].pinned && !obj->maps[i].reused)
7625                         bpf_map__unpin(&obj->maps[i], NULL);
7626
7627         bpf_object_unload(obj);
7628         pr_warn("failed to load object '%s'\n", obj->path);
7629         return libbpf_err(err);
7630 }
7631
7632 int bpf_object__load(struct bpf_object *obj)
7633 {
7634         return bpf_object_load(obj, 0, NULL);
7635 }
7636
7637 static int make_parent_dir(const char *path)
7638 {
7639         char *cp, errmsg[STRERR_BUFSIZE];
7640         char *dname, *dir;
7641         int err = 0;
7642
7643         dname = strdup(path);
7644         if (dname == NULL)
7645                 return -ENOMEM;
7646
7647         dir = dirname(dname);
7648         if (mkdir(dir, 0700) && errno != EEXIST)
7649                 err = -errno;
7650
7651         free(dname);
7652         if (err) {
7653                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7654                 pr_warn("failed to mkdir %s: %s\n", path, cp);
7655         }
7656         return err;
7657 }
7658
7659 static int check_path(const char *path)
7660 {
7661         char *cp, errmsg[STRERR_BUFSIZE];
7662         struct statfs st_fs;
7663         char *dname, *dir;
7664         int err = 0;
7665
7666         if (path == NULL)
7667                 return -EINVAL;
7668
7669         dname = strdup(path);
7670         if (dname == NULL)
7671                 return -ENOMEM;
7672
7673         dir = dirname(dname);
7674         if (statfs(dir, &st_fs)) {
7675                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7676                 pr_warn("failed to statfs %s: %s\n", dir, cp);
7677                 err = -errno;
7678         }
7679         free(dname);
7680
7681         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
7682                 pr_warn("specified path %s is not on BPF FS\n", path);
7683                 err = -EINVAL;
7684         }
7685
7686         return err;
7687 }
7688
7689 int bpf_program__pin(struct bpf_program *prog, const char *path)
7690 {
7691         char *cp, errmsg[STRERR_BUFSIZE];
7692         int err;
7693
7694         if (prog->fd < 0) {
7695                 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
7696                 return libbpf_err(-EINVAL);
7697         }
7698
7699         err = make_parent_dir(path);
7700         if (err)
7701                 return libbpf_err(err);
7702
7703         err = check_path(path);
7704         if (err)
7705                 return libbpf_err(err);
7706
7707         if (bpf_obj_pin(prog->fd, path)) {
7708                 err = -errno;
7709                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
7710                 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
7711                 return libbpf_err(err);
7712         }
7713
7714         pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
7715         return 0;
7716 }
7717
7718 int bpf_program__unpin(struct bpf_program *prog, const char *path)
7719 {
7720         int err;
7721
7722         if (prog->fd < 0) {
7723                 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
7724                 return libbpf_err(-EINVAL);
7725         }
7726
7727         err = check_path(path);
7728         if (err)
7729                 return libbpf_err(err);
7730
7731         err = unlink(path);
7732         if (err)
7733                 return libbpf_err(-errno);
7734
7735         pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
7736         return 0;
7737 }
7738
7739 int bpf_map__pin(struct bpf_map *map, const char *path)
7740 {
7741         char *cp, errmsg[STRERR_BUFSIZE];
7742         int err;
7743
7744         if (map == NULL) {
7745                 pr_warn("invalid map pointer\n");
7746                 return libbpf_err(-EINVAL);
7747         }
7748
7749         if (map->pin_path) {
7750                 if (path && strcmp(path, map->pin_path)) {
7751                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7752                                 bpf_map__name(map), map->pin_path, path);
7753                         return libbpf_err(-EINVAL);
7754                 } else if (map->pinned) {
7755                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
7756                                  bpf_map__name(map), map->pin_path);
7757                         return 0;
7758                 }
7759         } else {
7760                 if (!path) {
7761                         pr_warn("missing a path to pin map '%s' at\n",
7762                                 bpf_map__name(map));
7763                         return libbpf_err(-EINVAL);
7764                 } else if (map->pinned) {
7765                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
7766                         return libbpf_err(-EEXIST);
7767                 }
7768
7769                 map->pin_path = strdup(path);
7770                 if (!map->pin_path) {
7771                         err = -errno;
7772                         goto out_err;
7773                 }
7774         }
7775
7776         err = make_parent_dir(map->pin_path);
7777         if (err)
7778                 return libbpf_err(err);
7779
7780         err = check_path(map->pin_path);
7781         if (err)
7782                 return libbpf_err(err);
7783
7784         if (bpf_obj_pin(map->fd, map->pin_path)) {
7785                 err = -errno;
7786                 goto out_err;
7787         }
7788
7789         map->pinned = true;
7790         pr_debug("pinned map '%s'\n", map->pin_path);
7791
7792         return 0;
7793
7794 out_err:
7795         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7796         pr_warn("failed to pin map: %s\n", cp);
7797         return libbpf_err(err);
7798 }
7799
7800 int bpf_map__unpin(struct bpf_map *map, const char *path)
7801 {
7802         int err;
7803
7804         if (map == NULL) {
7805                 pr_warn("invalid map pointer\n");
7806                 return libbpf_err(-EINVAL);
7807         }
7808
7809         if (map->pin_path) {
7810                 if (path && strcmp(path, map->pin_path)) {
7811                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7812                                 bpf_map__name(map), map->pin_path, path);
7813                         return libbpf_err(-EINVAL);
7814                 }
7815                 path = map->pin_path;
7816         } else if (!path) {
7817                 pr_warn("no path to unpin map '%s' from\n",
7818                         bpf_map__name(map));
7819                 return libbpf_err(-EINVAL);
7820         }
7821
7822         err = check_path(path);
7823         if (err)
7824                 return libbpf_err(err);
7825
7826         err = unlink(path);
7827         if (err != 0)
7828                 return libbpf_err(-errno);
7829
7830         map->pinned = false;
7831         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
7832
7833         return 0;
7834 }
7835
7836 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
7837 {
7838         char *new = NULL;
7839
7840         if (path) {
7841                 new = strdup(path);
7842                 if (!new)
7843                         return libbpf_err(-errno);
7844         }
7845
7846         free(map->pin_path);
7847         map->pin_path = new;
7848         return 0;
7849 }
7850
7851 __alias(bpf_map__pin_path)
7852 const char *bpf_map__get_pin_path(const struct bpf_map *map);
7853
7854 const char *bpf_map__pin_path(const struct bpf_map *map)
7855 {
7856         return map->pin_path;
7857 }
7858
7859 bool bpf_map__is_pinned(const struct bpf_map *map)
7860 {
7861         return map->pinned;
7862 }
7863
7864 static void sanitize_pin_path(char *s)
7865 {
7866         /* bpffs disallows periods in path names */
7867         while (*s) {
7868                 if (*s == '.')
7869                         *s = '_';
7870                 s++;
7871         }
7872 }
7873
7874 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
7875 {
7876         struct bpf_map *map;
7877         int err;
7878
7879         if (!obj)
7880                 return libbpf_err(-ENOENT);
7881
7882         if (!obj->loaded) {
7883                 pr_warn("object not yet loaded; load it first\n");
7884                 return libbpf_err(-ENOENT);
7885         }
7886
7887         bpf_object__for_each_map(map, obj) {
7888                 char *pin_path = NULL;
7889                 char buf[PATH_MAX];
7890
7891                 if (!map->autocreate)
7892                         continue;
7893
7894                 if (path) {
7895                         int len;
7896
7897                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
7898                                        bpf_map__name(map));
7899                         if (len < 0) {
7900                                 err = -EINVAL;
7901                                 goto err_unpin_maps;
7902                         } else if (len >= PATH_MAX) {
7903                                 err = -ENAMETOOLONG;
7904                                 goto err_unpin_maps;
7905                         }
7906                         sanitize_pin_path(buf);
7907                         pin_path = buf;
7908                 } else if (!map->pin_path) {
7909                         continue;
7910                 }
7911
7912                 err = bpf_map__pin(map, pin_path);
7913                 if (err)
7914                         goto err_unpin_maps;
7915         }
7916
7917         return 0;
7918
7919 err_unpin_maps:
7920         while ((map = bpf_object__prev_map(obj, map))) {
7921                 if (!map->pin_path)
7922                         continue;
7923
7924                 bpf_map__unpin(map, NULL);
7925         }
7926
7927         return libbpf_err(err);
7928 }
7929
7930 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
7931 {
7932         struct bpf_map *map;
7933         int err;
7934
7935         if (!obj)
7936                 return libbpf_err(-ENOENT);
7937
7938         bpf_object__for_each_map(map, obj) {
7939                 char *pin_path = NULL;
7940                 char buf[PATH_MAX];
7941
7942                 if (path) {
7943                         int len;
7944
7945                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
7946                                        bpf_map__name(map));
7947                         if (len < 0)
7948                                 return libbpf_err(-EINVAL);
7949                         else if (len >= PATH_MAX)
7950                                 return libbpf_err(-ENAMETOOLONG);
7951                         sanitize_pin_path(buf);
7952                         pin_path = buf;
7953                 } else if (!map->pin_path) {
7954                         continue;
7955                 }
7956
7957                 err = bpf_map__unpin(map, pin_path);
7958                 if (err)
7959                         return libbpf_err(err);
7960         }
7961
7962         return 0;
7963 }
7964
7965 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
7966 {
7967         struct bpf_program *prog;
7968         int err;
7969
7970         if (!obj)
7971                 return libbpf_err(-ENOENT);
7972
7973         if (!obj->loaded) {
7974                 pr_warn("object not yet loaded; load it first\n");
7975                 return libbpf_err(-ENOENT);
7976         }
7977
7978         bpf_object__for_each_program(prog, obj) {
7979                 char buf[PATH_MAX];
7980                 int len;
7981
7982                 len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
7983                 if (len < 0) {
7984                         err = -EINVAL;
7985                         goto err_unpin_programs;
7986                 } else if (len >= PATH_MAX) {
7987                         err = -ENAMETOOLONG;
7988                         goto err_unpin_programs;
7989                 }
7990
7991                 err = bpf_program__pin(prog, buf);
7992                 if (err)
7993                         goto err_unpin_programs;
7994         }
7995
7996         return 0;
7997
7998 err_unpin_programs:
7999         while ((prog = bpf_object__prev_program(obj, prog))) {
8000                 char buf[PATH_MAX];
8001                 int len;
8002
8003                 len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
8004                 if (len < 0)
8005                         continue;
8006                 else if (len >= PATH_MAX)
8007                         continue;
8008
8009                 bpf_program__unpin(prog, buf);
8010         }
8011
8012         return libbpf_err(err);
8013 }
8014
8015 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8016 {
8017         struct bpf_program *prog;
8018         int err;
8019
8020         if (!obj)
8021                 return libbpf_err(-ENOENT);
8022
8023         bpf_object__for_each_program(prog, obj) {
8024                 char buf[PATH_MAX];
8025                 int len;
8026
8027                 len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
8028                 if (len < 0)
8029                         return libbpf_err(-EINVAL);
8030                 else if (len >= PATH_MAX)
8031                         return libbpf_err(-ENAMETOOLONG);
8032
8033                 err = bpf_program__unpin(prog, buf);
8034                 if (err)
8035                         return libbpf_err(err);
8036         }
8037
8038         return 0;
8039 }
8040
8041 int bpf_object__pin(struct bpf_object *obj, const char *path)
8042 {
8043         int err;
8044
8045         err = bpf_object__pin_maps(obj, path);
8046         if (err)
8047                 return libbpf_err(err);
8048
8049         err = bpf_object__pin_programs(obj, path);
8050         if (err) {
8051                 bpf_object__unpin_maps(obj, path);
8052                 return libbpf_err(err);
8053         }
8054
8055         return 0;
8056 }
8057
8058 static void bpf_map__destroy(struct bpf_map *map)
8059 {
8060         if (map->inner_map) {
8061                 bpf_map__destroy(map->inner_map);
8062                 zfree(&map->inner_map);
8063         }
8064
8065         zfree(&map->init_slots);
8066         map->init_slots_sz = 0;
8067
8068         if (map->mmaped) {
8069                 munmap(map->mmaped, bpf_map_mmap_sz(map));
8070                 map->mmaped = NULL;
8071         }
8072
8073         if (map->st_ops) {
8074                 zfree(&map->st_ops->data);
8075                 zfree(&map->st_ops->progs);
8076                 zfree(&map->st_ops->kern_func_off);
8077                 zfree(&map->st_ops);
8078         }
8079
8080         zfree(&map->name);
8081         zfree(&map->real_name);
8082         zfree(&map->pin_path);
8083
8084         if (map->fd >= 0)
8085                 zclose(map->fd);
8086 }
8087
8088 void bpf_object__close(struct bpf_object *obj)
8089 {
8090         size_t i;
8091
8092         if (IS_ERR_OR_NULL(obj))
8093                 return;
8094
8095         usdt_manager_free(obj->usdt_man);
8096         obj->usdt_man = NULL;
8097
8098         bpf_gen__free(obj->gen_loader);
8099         bpf_object__elf_finish(obj);
8100         bpf_object_unload(obj);
8101         btf__free(obj->btf);
8102         btf_ext__free(obj->btf_ext);
8103
8104         for (i = 0; i < obj->nr_maps; i++)
8105                 bpf_map__destroy(&obj->maps[i]);
8106
8107         zfree(&obj->btf_custom_path);
8108         zfree(&obj->kconfig);
8109         zfree(&obj->externs);
8110         obj->nr_extern = 0;
8111
8112         zfree(&obj->maps);
8113         obj->nr_maps = 0;
8114
8115         if (obj->programs && obj->nr_programs) {
8116                 for (i = 0; i < obj->nr_programs; i++)
8117                         bpf_program__exit(&obj->programs[i]);
8118         }
8119         zfree(&obj->programs);
8120
8121         free(obj);
8122 }
8123
8124 const char *bpf_object__name(const struct bpf_object *obj)
8125 {
8126         return obj ? obj->name : libbpf_err_ptr(-EINVAL);
8127 }
8128
8129 unsigned int bpf_object__kversion(const struct bpf_object *obj)
8130 {
8131         return obj ? obj->kern_version : 0;
8132 }
8133
8134 struct btf *bpf_object__btf(const struct bpf_object *obj)
8135 {
8136         return obj ? obj->btf : NULL;
8137 }
8138
8139 int bpf_object__btf_fd(const struct bpf_object *obj)
8140 {
8141         return obj->btf ? btf__fd(obj->btf) : -1;
8142 }
8143
8144 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
8145 {
8146         if (obj->loaded)
8147                 return libbpf_err(-EINVAL);
8148
8149         obj->kern_version = kern_version;
8150
8151         return 0;
8152 }
8153
8154 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
8155 {
8156         struct bpf_gen *gen;
8157
8158         if (!opts)
8159                 return -EFAULT;
8160         if (!OPTS_VALID(opts, gen_loader_opts))
8161                 return -EINVAL;
8162         gen = calloc(sizeof(*gen), 1);
8163         if (!gen)
8164                 return -ENOMEM;
8165         gen->opts = opts;
8166         obj->gen_loader = gen;
8167         return 0;
8168 }
8169
8170 static struct bpf_program *
8171 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8172                     bool forward)
8173 {
8174         size_t nr_programs = obj->nr_programs;
8175         ssize_t idx;
8176
8177         if (!nr_programs)
8178                 return NULL;
8179
8180         if (!p)
8181                 /* Iter from the beginning */
8182                 return forward ? &obj->programs[0] :
8183                         &obj->programs[nr_programs - 1];
8184
8185         if (p->obj != obj) {
8186                 pr_warn("error: program handler doesn't match object\n");
8187                 return errno = EINVAL, NULL;
8188         }
8189
8190         idx = (p - obj->programs) + (forward ? 1 : -1);
8191         if (idx >= obj->nr_programs || idx < 0)
8192                 return NULL;
8193         return &obj->programs[idx];
8194 }
8195
8196 struct bpf_program *
8197 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
8198 {
8199         struct bpf_program *prog = prev;
8200
8201         do {
8202                 prog = __bpf_program__iter(prog, obj, true);
8203         } while (prog && prog_is_subprog(obj, prog));
8204
8205         return prog;
8206 }
8207
8208 struct bpf_program *
8209 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
8210 {
8211         struct bpf_program *prog = next;
8212
8213         do {
8214                 prog = __bpf_program__iter(prog, obj, false);
8215         } while (prog && prog_is_subprog(obj, prog));
8216
8217         return prog;
8218 }
8219
8220 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
8221 {
8222         prog->prog_ifindex = ifindex;
8223 }
8224
8225 const char *bpf_program__name(const struct bpf_program *prog)
8226 {
8227         return prog->name;
8228 }
8229
8230 const char *bpf_program__section_name(const struct bpf_program *prog)
8231 {
8232         return prog->sec_name;
8233 }
8234
8235 bool bpf_program__autoload(const struct bpf_program *prog)
8236 {
8237         return prog->autoload;
8238 }
8239
8240 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8241 {
8242         if (prog->obj->loaded)
8243                 return libbpf_err(-EINVAL);
8244
8245         prog->autoload = autoload;
8246         return 0;
8247 }
8248
8249 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
8250 {
8251         return prog->insns;
8252 }
8253
8254 size_t bpf_program__insn_cnt(const struct bpf_program *prog)
8255 {
8256         return prog->insns_cnt;
8257 }
8258
8259 int bpf_program__set_insns(struct bpf_program *prog,
8260                            struct bpf_insn *new_insns, size_t new_insn_cnt)
8261 {
8262         struct bpf_insn *insns;
8263
8264         if (prog->obj->loaded)
8265                 return -EBUSY;
8266
8267         insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
8268         if (!insns) {
8269                 pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
8270                 return -ENOMEM;
8271         }
8272         memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
8273
8274         prog->insns = insns;
8275         prog->insns_cnt = new_insn_cnt;
8276         return 0;
8277 }
8278
8279 int bpf_program__fd(const struct bpf_program *prog)
8280 {
8281         if (!prog)
8282                 return libbpf_err(-EINVAL);
8283
8284         if (prog->fd < 0)
8285                 return libbpf_err(-ENOENT);
8286
8287         return prog->fd;
8288 }
8289
8290 __alias(bpf_program__type)
8291 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
8292
8293 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
8294 {
8295         return prog->type;
8296 }
8297
8298 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
8299 {
8300         if (prog->obj->loaded)
8301                 return libbpf_err(-EBUSY);
8302
8303         prog->type = type;
8304         return 0;
8305 }
8306
8307 __alias(bpf_program__expected_attach_type)
8308 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
8309
8310 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
8311 {
8312         return prog->expected_attach_type;
8313 }
8314
8315 int bpf_program__set_expected_attach_type(struct bpf_program *prog,
8316                                            enum bpf_attach_type type)
8317 {
8318         if (prog->obj->loaded)
8319                 return libbpf_err(-EBUSY);
8320
8321         prog->expected_attach_type = type;
8322         return 0;
8323 }
8324
8325 __u32 bpf_program__flags(const struct bpf_program *prog)
8326 {
8327         return prog->prog_flags;
8328 }
8329
8330 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
8331 {
8332         if (prog->obj->loaded)
8333                 return libbpf_err(-EBUSY);
8334
8335         prog->prog_flags = flags;
8336         return 0;
8337 }
8338
8339 __u32 bpf_program__log_level(const struct bpf_program *prog)
8340 {
8341         return prog->log_level;
8342 }
8343
8344 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
8345 {
8346         if (prog->obj->loaded)
8347                 return libbpf_err(-EBUSY);
8348
8349         prog->log_level = log_level;
8350         return 0;
8351 }
8352
8353 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
8354 {
8355         *log_size = prog->log_size;
8356         return prog->log_buf;
8357 }
8358
8359 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
8360 {
8361         if (log_size && !log_buf)
8362                 return -EINVAL;
8363         if (prog->log_size > UINT_MAX)
8364                 return -EINVAL;
8365         if (prog->obj->loaded)
8366                 return -EBUSY;
8367
8368         prog->log_buf = log_buf;
8369         prog->log_size = log_size;
8370         return 0;
8371 }
8372
8373 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {                        \
8374         .sec = (char *)sec_pfx,                                             \
8375         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
8376         .expected_attach_type = atype,                                      \
8377         .cookie = (long)(flags),                                            \
8378         .prog_prepare_load_fn = libbpf_prepare_prog_load,                   \
8379         __VA_ARGS__                                                         \
8380 }
8381
8382 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8383 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8384 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8385 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8386 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8387 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8388 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8389 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8390 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8391
8392 static const struct bpf_sec_def section_defs[] = {
8393         SEC_DEF("socket",               SOCKET_FILTER, 0, SEC_NONE),
8394         SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
8395         SEC_DEF("sk_reuseport",         SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
8396         SEC_DEF("kprobe+",              KPROBE, 0, SEC_NONE, attach_kprobe),
8397         SEC_DEF("uprobe+",              KPROBE, 0, SEC_NONE, attach_uprobe),
8398         SEC_DEF("uprobe.s+",            KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
8399         SEC_DEF("kretprobe+",           KPROBE, 0, SEC_NONE, attach_kprobe),
8400         SEC_DEF("uretprobe+",           KPROBE, 0, SEC_NONE, attach_uprobe),
8401         SEC_DEF("uretprobe.s+",         KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
8402         SEC_DEF("kprobe.multi+",        KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8403         SEC_DEF("kretprobe.multi+",     KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8404         SEC_DEF("usdt+",                KPROBE, 0, SEC_NONE, attach_usdt),
8405         SEC_DEF("tc",                   SCHED_CLS, 0, SEC_NONE),
8406         SEC_DEF("classifier",           SCHED_CLS, 0, SEC_NONE),
8407         SEC_DEF("action",               SCHED_ACT, 0, SEC_NONE),
8408         SEC_DEF("tracepoint+",          TRACEPOINT, 0, SEC_NONE, attach_tp),
8409         SEC_DEF("tp+",                  TRACEPOINT, 0, SEC_NONE, attach_tp),
8410         SEC_DEF("raw_tracepoint+",      RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
8411         SEC_DEF("raw_tp+",              RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
8412         SEC_DEF("raw_tracepoint.w+",    RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
8413         SEC_DEF("raw_tp.w+",            RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
8414         SEC_DEF("tp_btf+",              TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
8415         SEC_DEF("fentry+",              TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
8416         SEC_DEF("fmod_ret+",            TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
8417         SEC_DEF("fexit+",               TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
8418         SEC_DEF("fentry.s+",            TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8419         SEC_DEF("fmod_ret.s+",          TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8420         SEC_DEF("fexit.s+",             TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8421         SEC_DEF("freplace+",            EXT, 0, SEC_ATTACH_BTF, attach_trace),
8422         SEC_DEF("lsm+",                 LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
8423         SEC_DEF("lsm.s+",               LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
8424         SEC_DEF("lsm_cgroup+",          LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
8425         SEC_DEF("iter+",                TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
8426         SEC_DEF("iter.s+",              TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
8427         SEC_DEF("syscall",              SYSCALL, 0, SEC_SLEEPABLE),
8428         SEC_DEF("xdp.frags/devmap",     XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
8429         SEC_DEF("xdp/devmap",           XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
8430         SEC_DEF("xdp.frags/cpumap",     XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
8431         SEC_DEF("xdp/cpumap",           XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
8432         SEC_DEF("xdp.frags",            XDP, BPF_XDP, SEC_XDP_FRAGS),
8433         SEC_DEF("xdp",                  XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
8434         SEC_DEF("perf_event",           PERF_EVENT, 0, SEC_NONE),
8435         SEC_DEF("lwt_in",               LWT_IN, 0, SEC_NONE),
8436         SEC_DEF("lwt_out",              LWT_OUT, 0, SEC_NONE),
8437         SEC_DEF("lwt_xmit",             LWT_XMIT, 0, SEC_NONE),
8438         SEC_DEF("lwt_seg6local",        LWT_SEG6LOCAL, 0, SEC_NONE),
8439         SEC_DEF("sockops",              SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
8440         SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
8441         SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
8442         SEC_DEF("sk_skb",               SK_SKB, 0, SEC_NONE),
8443         SEC_DEF("sk_msg",               SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
8444         SEC_DEF("lirc_mode2",           LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
8445         SEC_DEF("flow_dissector",       FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
8446         SEC_DEF("cgroup_skb/ingress",   CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
8447         SEC_DEF("cgroup_skb/egress",    CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
8448         SEC_DEF("cgroup/skb",           CGROUP_SKB, 0, SEC_NONE),
8449         SEC_DEF("cgroup/sock_create",   CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
8450         SEC_DEF("cgroup/sock_release",  CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
8451         SEC_DEF("cgroup/sock",          CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
8452         SEC_DEF("cgroup/post_bind4",    CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
8453         SEC_DEF("cgroup/post_bind6",    CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
8454         SEC_DEF("cgroup/bind4",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
8455         SEC_DEF("cgroup/bind6",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
8456         SEC_DEF("cgroup/connect4",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
8457         SEC_DEF("cgroup/connect6",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
8458         SEC_DEF("cgroup/sendmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
8459         SEC_DEF("cgroup/sendmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
8460         SEC_DEF("cgroup/recvmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
8461         SEC_DEF("cgroup/recvmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
8462         SEC_DEF("cgroup/getpeername4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
8463         SEC_DEF("cgroup/getpeername6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
8464         SEC_DEF("cgroup/getsockname4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
8465         SEC_DEF("cgroup/getsockname6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
8466         SEC_DEF("cgroup/sysctl",        CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
8467         SEC_DEF("cgroup/getsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
8468         SEC_DEF("cgroup/setsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
8469         SEC_DEF("cgroup/dev",           CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
8470         SEC_DEF("struct_ops+",          STRUCT_OPS, 0, SEC_NONE),
8471         SEC_DEF("sk_lookup",            SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
8472 };
8473
8474 static size_t custom_sec_def_cnt;
8475 static struct bpf_sec_def *custom_sec_defs;
8476 static struct bpf_sec_def custom_fallback_def;
8477 static bool has_custom_fallback_def;
8478
8479 static int last_custom_sec_def_handler_id;
8480
8481 int libbpf_register_prog_handler(const char *sec,
8482                                  enum bpf_prog_type prog_type,
8483                                  enum bpf_attach_type exp_attach_type,
8484                                  const struct libbpf_prog_handler_opts *opts)
8485 {
8486         struct bpf_sec_def *sec_def;
8487
8488         if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
8489                 return libbpf_err(-EINVAL);
8490
8491         if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
8492                 return libbpf_err(-E2BIG);
8493
8494         if (sec) {
8495                 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
8496                                               sizeof(*sec_def));
8497                 if (!sec_def)
8498                         return libbpf_err(-ENOMEM);
8499
8500                 custom_sec_defs = sec_def;
8501                 sec_def = &custom_sec_defs[custom_sec_def_cnt];
8502         } else {
8503                 if (has_custom_fallback_def)
8504                         return libbpf_err(-EBUSY);
8505
8506                 sec_def = &custom_fallback_def;
8507         }
8508
8509         sec_def->sec = sec ? strdup(sec) : NULL;
8510         if (sec && !sec_def->sec)
8511                 return libbpf_err(-ENOMEM);
8512
8513         sec_def->prog_type = prog_type;
8514         sec_def->expected_attach_type = exp_attach_type;
8515         sec_def->cookie = OPTS_GET(opts, cookie, 0);
8516
8517         sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
8518         sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
8519         sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
8520
8521         sec_def->handler_id = ++last_custom_sec_def_handler_id;
8522
8523         if (sec)
8524                 custom_sec_def_cnt++;
8525         else
8526                 has_custom_fallback_def = true;
8527
8528         return sec_def->handler_id;
8529 }
8530
8531 int libbpf_unregister_prog_handler(int handler_id)
8532 {
8533         struct bpf_sec_def *sec_defs;
8534         int i;
8535
8536         if (handler_id <= 0)
8537                 return libbpf_err(-EINVAL);
8538
8539         if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
8540                 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
8541                 has_custom_fallback_def = false;
8542                 return 0;
8543         }
8544
8545         for (i = 0; i < custom_sec_def_cnt; i++) {
8546                 if (custom_sec_defs[i].handler_id == handler_id)
8547                         break;
8548         }
8549
8550         if (i == custom_sec_def_cnt)
8551                 return libbpf_err(-ENOENT);
8552
8553         free(custom_sec_defs[i].sec);
8554         for (i = i + 1; i < custom_sec_def_cnt; i++)
8555                 custom_sec_defs[i - 1] = custom_sec_defs[i];
8556         custom_sec_def_cnt--;
8557
8558         /* try to shrink the array, but it's ok if we couldn't */
8559         sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
8560         if (sec_defs)
8561                 custom_sec_defs = sec_defs;
8562
8563         return 0;
8564 }
8565
8566 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
8567 {
8568         size_t len = strlen(sec_def->sec);
8569
8570         /* "type/" always has to have proper SEC("type/extras") form */
8571         if (sec_def->sec[len - 1] == '/') {
8572                 if (str_has_pfx(sec_name, sec_def->sec))
8573                         return true;
8574                 return false;
8575         }
8576
8577         /* "type+" means it can be either exact SEC("type") or
8578          * well-formed SEC("type/extras") with proper '/' separator
8579          */
8580         if (sec_def->sec[len - 1] == '+') {
8581                 len--;
8582                 /* not even a prefix */
8583                 if (strncmp(sec_name, sec_def->sec, len) != 0)
8584                         return false;
8585                 /* exact match or has '/' separator */
8586                 if (sec_name[len] == '\0' || sec_name[len] == '/')
8587                         return true;
8588                 return false;
8589         }
8590
8591         return strcmp(sec_name, sec_def->sec) == 0;
8592 }
8593
8594 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
8595 {
8596         const struct bpf_sec_def *sec_def;
8597         int i, n;
8598
8599         n = custom_sec_def_cnt;
8600         for (i = 0; i < n; i++) {
8601                 sec_def = &custom_sec_defs[i];
8602                 if (sec_def_matches(sec_def, sec_name))
8603                         return sec_def;
8604         }
8605
8606         n = ARRAY_SIZE(section_defs);
8607         for (i = 0; i < n; i++) {
8608                 sec_def = &section_defs[i];
8609                 if (sec_def_matches(sec_def, sec_name))
8610                         return sec_def;
8611         }
8612
8613         if (has_custom_fallback_def)
8614                 return &custom_fallback_def;
8615
8616         return NULL;
8617 }
8618
8619 #define MAX_TYPE_NAME_SIZE 32
8620
8621 static char *libbpf_get_type_names(bool attach_type)
8622 {
8623         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
8624         char *buf;
8625
8626         buf = malloc(len);
8627         if (!buf)
8628                 return NULL;
8629
8630         buf[0] = '\0';
8631         /* Forge string buf with all available names */
8632         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8633                 const struct bpf_sec_def *sec_def = &section_defs[i];
8634
8635                 if (attach_type) {
8636                         if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
8637                                 continue;
8638
8639                         if (!(sec_def->cookie & SEC_ATTACHABLE))
8640                                 continue;
8641                 }
8642
8643                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
8644                         free(buf);
8645                         return NULL;
8646                 }
8647                 strcat(buf, " ");
8648                 strcat(buf, section_defs[i].sec);
8649         }
8650
8651         return buf;
8652 }
8653
8654 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
8655                              enum bpf_attach_type *expected_attach_type)
8656 {
8657         const struct bpf_sec_def *sec_def;
8658         char *type_names;
8659
8660         if (!name)
8661                 return libbpf_err(-EINVAL);
8662
8663         sec_def = find_sec_def(name);
8664         if (sec_def) {
8665                 *prog_type = sec_def->prog_type;
8666                 *expected_attach_type = sec_def->expected_attach_type;
8667                 return 0;
8668         }
8669
8670         pr_debug("failed to guess program type from ELF section '%s'\n", name);
8671         type_names = libbpf_get_type_names(false);
8672         if (type_names != NULL) {
8673                 pr_debug("supported section(type) names are:%s\n", type_names);
8674                 free(type_names);
8675         }
8676
8677         return libbpf_err(-ESRCH);
8678 }
8679
8680 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
8681 {
8682         if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
8683                 return NULL;
8684
8685         return attach_type_name[t];
8686 }
8687
8688 const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
8689 {
8690         if (t < 0 || t >= ARRAY_SIZE(link_type_name))
8691                 return NULL;
8692
8693         return link_type_name[t];
8694 }
8695
8696 const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
8697 {
8698         if (t < 0 || t >= ARRAY_SIZE(map_type_name))
8699                 return NULL;
8700
8701         return map_type_name[t];
8702 }
8703
8704 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
8705 {
8706         if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
8707                 return NULL;
8708
8709         return prog_type_name[t];
8710 }
8711
8712 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
8713                                                      size_t offset)
8714 {
8715         struct bpf_map *map;
8716         size_t i;
8717
8718         for (i = 0; i < obj->nr_maps; i++) {
8719                 map = &obj->maps[i];
8720                 if (!bpf_map__is_struct_ops(map))
8721                         continue;
8722                 if (map->sec_offset <= offset &&
8723                     offset - map->sec_offset < map->def.value_size)
8724                         return map;
8725         }
8726
8727         return NULL;
8728 }
8729
8730 /* Collect the reloc from ELF and populate the st_ops->progs[] */
8731 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
8732                                             Elf64_Shdr *shdr, Elf_Data *data)
8733 {
8734         const struct btf_member *member;
8735         struct bpf_struct_ops *st_ops;
8736         struct bpf_program *prog;
8737         unsigned int shdr_idx;
8738         const struct btf *btf;
8739         struct bpf_map *map;
8740         unsigned int moff, insn_idx;
8741         const char *name;
8742         __u32 member_idx;
8743         Elf64_Sym *sym;
8744         Elf64_Rel *rel;
8745         int i, nrels;
8746
8747         btf = obj->btf;
8748         nrels = shdr->sh_size / shdr->sh_entsize;
8749         for (i = 0; i < nrels; i++) {
8750                 rel = elf_rel_by_idx(data, i);
8751                 if (!rel) {
8752                         pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
8753                         return -LIBBPF_ERRNO__FORMAT;
8754                 }
8755
8756                 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
8757                 if (!sym) {
8758                         pr_warn("struct_ops reloc: symbol %zx not found\n",
8759                                 (size_t)ELF64_R_SYM(rel->r_info));
8760                         return -LIBBPF_ERRNO__FORMAT;
8761                 }
8762
8763                 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
8764                 map = find_struct_ops_map_by_offset(obj, rel->r_offset);
8765                 if (!map) {
8766                         pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
8767                                 (size_t)rel->r_offset);
8768                         return -EINVAL;
8769                 }
8770
8771                 moff = rel->r_offset - map->sec_offset;
8772                 shdr_idx = sym->st_shndx;
8773                 st_ops = map->st_ops;
8774                 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
8775                          map->name,
8776                          (long long)(rel->r_info >> 32),
8777                          (long long)sym->st_value,
8778                          shdr_idx, (size_t)rel->r_offset,
8779                          map->sec_offset, sym->st_name, name);
8780
8781                 if (shdr_idx >= SHN_LORESERVE) {
8782                         pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
8783                                 map->name, (size_t)rel->r_offset, shdr_idx);
8784                         return -LIBBPF_ERRNO__RELOC;
8785                 }
8786                 if (sym->st_value % BPF_INSN_SZ) {
8787                         pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
8788                                 map->name, (unsigned long long)sym->st_value);
8789                         return -LIBBPF_ERRNO__FORMAT;
8790                 }
8791                 insn_idx = sym->st_value / BPF_INSN_SZ;
8792
8793                 member = find_member_by_offset(st_ops->type, moff * 8);
8794                 if (!member) {
8795                         pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
8796                                 map->name, moff);
8797                         return -EINVAL;
8798                 }
8799                 member_idx = member - btf_members(st_ops->type);
8800                 name = btf__name_by_offset(btf, member->name_off);
8801
8802                 if (!resolve_func_ptr(btf, member->type, NULL)) {
8803                         pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
8804                                 map->name, name);
8805                         return -EINVAL;
8806                 }
8807
8808                 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
8809                 if (!prog) {
8810                         pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
8811                                 map->name, shdr_idx, name);
8812                         return -EINVAL;
8813                 }
8814
8815                 /* prevent the use of BPF prog with invalid type */
8816                 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
8817                         pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
8818                                 map->name, prog->name);
8819                         return -EINVAL;
8820                 }
8821
8822                 /* if we haven't yet processed this BPF program, record proper
8823                  * attach_btf_id and member_idx
8824                  */
8825                 if (!prog->attach_btf_id) {
8826                         prog->attach_btf_id = st_ops->type_id;
8827                         prog->expected_attach_type = member_idx;
8828                 }
8829
8830                 /* struct_ops BPF prog can be re-used between multiple
8831                  * .struct_ops as long as it's the same struct_ops struct
8832                  * definition and the same function pointer field
8833                  */
8834                 if (prog->attach_btf_id != st_ops->type_id ||
8835                     prog->expected_attach_type != member_idx) {
8836                         pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
8837                                 map->name, prog->name, prog->sec_name, prog->type,
8838                                 prog->attach_btf_id, prog->expected_attach_type, name);
8839                         return -EINVAL;
8840                 }
8841
8842                 st_ops->progs[member_idx] = prog;
8843         }
8844
8845         return 0;
8846 }
8847
8848 #define BTF_TRACE_PREFIX "btf_trace_"
8849 #define BTF_LSM_PREFIX "bpf_lsm_"
8850 #define BTF_ITER_PREFIX "bpf_iter_"
8851 #define BTF_MAX_NAME_SIZE 128
8852
8853 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
8854                                 const char **prefix, int *kind)
8855 {
8856         switch (attach_type) {
8857         case BPF_TRACE_RAW_TP:
8858                 *prefix = BTF_TRACE_PREFIX;
8859                 *kind = BTF_KIND_TYPEDEF;
8860                 break;
8861         case BPF_LSM_MAC:
8862         case BPF_LSM_CGROUP:
8863                 *prefix = BTF_LSM_PREFIX;
8864                 *kind = BTF_KIND_FUNC;
8865                 break;
8866         case BPF_TRACE_ITER:
8867                 *prefix = BTF_ITER_PREFIX;
8868                 *kind = BTF_KIND_FUNC;
8869                 break;
8870         default:
8871                 *prefix = "";
8872                 *kind = BTF_KIND_FUNC;
8873         }
8874 }
8875
8876 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
8877                                    const char *name, __u32 kind)
8878 {
8879         char btf_type_name[BTF_MAX_NAME_SIZE];
8880         int ret;
8881
8882         ret = snprintf(btf_type_name, sizeof(btf_type_name),
8883                        "%s%s", prefix, name);
8884         /* snprintf returns the number of characters written excluding the
8885          * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
8886          * indicates truncation.
8887          */
8888         if (ret < 0 || ret >= sizeof(btf_type_name))
8889                 return -ENAMETOOLONG;
8890         return btf__find_by_name_kind(btf, btf_type_name, kind);
8891 }
8892
8893 static inline int find_attach_btf_id(struct btf *btf, const char *name,
8894                                      enum bpf_attach_type attach_type)
8895 {
8896         const char *prefix;
8897         int kind;
8898
8899         btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
8900         return find_btf_by_prefix_kind(btf, prefix, name, kind);
8901 }
8902
8903 int libbpf_find_vmlinux_btf_id(const char *name,
8904                                enum bpf_attach_type attach_type)
8905 {
8906         struct btf *btf;
8907         int err;
8908
8909         btf = btf__load_vmlinux_btf();
8910         err = libbpf_get_error(btf);
8911         if (err) {
8912                 pr_warn("vmlinux BTF is not found\n");
8913                 return libbpf_err(err);
8914         }
8915
8916         err = find_attach_btf_id(btf, name, attach_type);
8917         if (err <= 0)
8918                 pr_warn("%s is not found in vmlinux BTF\n", name);
8919
8920         btf__free(btf);
8921         return libbpf_err(err);
8922 }
8923
8924 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
8925 {
8926         struct bpf_prog_info info = {};
8927         __u32 info_len = sizeof(info);
8928         struct btf *btf;
8929         int err;
8930
8931         err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len);
8932         if (err) {
8933                 pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n",
8934                         attach_prog_fd, err);
8935                 return err;
8936         }
8937
8938         err = -EINVAL;
8939         if (!info.btf_id) {
8940                 pr_warn("The target program doesn't have BTF\n");
8941                 goto out;
8942         }
8943         btf = btf__load_from_kernel_by_id(info.btf_id);
8944         err = libbpf_get_error(btf);
8945         if (err) {
8946                 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
8947                 goto out;
8948         }
8949         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
8950         btf__free(btf);
8951         if (err <= 0) {
8952                 pr_warn("%s is not found in prog's BTF\n", name);
8953                 goto out;
8954         }
8955 out:
8956         return err;
8957 }
8958
8959 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
8960                               enum bpf_attach_type attach_type,
8961                               int *btf_obj_fd, int *btf_type_id)
8962 {
8963         int ret, i;
8964
8965         ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
8966         if (ret > 0) {
8967                 *btf_obj_fd = 0; /* vmlinux BTF */
8968                 *btf_type_id = ret;
8969                 return 0;
8970         }
8971         if (ret != -ENOENT)
8972                 return ret;
8973
8974         ret = load_module_btfs(obj);
8975         if (ret)
8976                 return ret;
8977
8978         for (i = 0; i < obj->btf_module_cnt; i++) {
8979                 const struct module_btf *mod = &obj->btf_modules[i];
8980
8981                 ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
8982                 if (ret > 0) {
8983                         *btf_obj_fd = mod->fd;
8984                         *btf_type_id = ret;
8985                         return 0;
8986                 }
8987                 if (ret == -ENOENT)
8988                         continue;
8989
8990                 return ret;
8991         }
8992
8993         return -ESRCH;
8994 }
8995
8996 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
8997                                      int *btf_obj_fd, int *btf_type_id)
8998 {
8999         enum bpf_attach_type attach_type = prog->expected_attach_type;
9000         __u32 attach_prog_fd = prog->attach_prog_fd;
9001         int err = 0;
9002
9003         /* BPF program's BTF ID */
9004         if (attach_prog_fd) {
9005                 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
9006                 if (err < 0) {
9007                         pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9008                                  attach_prog_fd, attach_name, err);
9009                         return err;
9010                 }
9011                 *btf_obj_fd = 0;
9012                 *btf_type_id = err;
9013                 return 0;
9014         }
9015
9016         /* kernel/module BTF ID */
9017         if (prog->obj->gen_loader) {
9018                 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
9019                 *btf_obj_fd = 0;
9020                 *btf_type_id = 1;
9021         } else {
9022                 err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
9023         }
9024         if (err) {
9025                 pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
9026                 return err;
9027         }
9028         return 0;
9029 }
9030
9031 int libbpf_attach_type_by_name(const char *name,
9032                                enum bpf_attach_type *attach_type)
9033 {
9034         char *type_names;
9035         const struct bpf_sec_def *sec_def;
9036
9037         if (!name)
9038                 return libbpf_err(-EINVAL);
9039
9040         sec_def = find_sec_def(name);
9041         if (!sec_def) {
9042                 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
9043                 type_names = libbpf_get_type_names(true);
9044                 if (type_names != NULL) {
9045                         pr_debug("attachable section(type) names are:%s\n", type_names);
9046                         free(type_names);
9047                 }
9048
9049                 return libbpf_err(-EINVAL);
9050         }
9051
9052         if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9053                 return libbpf_err(-EINVAL);
9054         if (!(sec_def->cookie & SEC_ATTACHABLE))
9055                 return libbpf_err(-EINVAL);
9056
9057         *attach_type = sec_def->expected_attach_type;
9058         return 0;
9059 }
9060
9061 int bpf_map__fd(const struct bpf_map *map)
9062 {
9063         return map ? map->fd : libbpf_err(-EINVAL);
9064 }
9065
9066 static bool map_uses_real_name(const struct bpf_map *map)
9067 {
9068         /* Since libbpf started to support custom .data.* and .rodata.* maps,
9069          * their user-visible name differs from kernel-visible name. Users see
9070          * such map's corresponding ELF section name as a map name.
9071          * This check distinguishes .data/.rodata from .data.* and .rodata.*
9072          * maps to know which name has to be returned to the user.
9073          */
9074         if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
9075                 return true;
9076         if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
9077                 return true;
9078         return false;
9079 }
9080
9081 const char *bpf_map__name(const struct bpf_map *map)
9082 {
9083         if (!map)
9084                 return NULL;
9085
9086         if (map_uses_real_name(map))
9087                 return map->real_name;
9088
9089         return map->name;
9090 }
9091
9092 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
9093 {
9094         return map->def.type;
9095 }
9096
9097 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
9098 {
9099         if (map->fd >= 0)
9100                 return libbpf_err(-EBUSY);
9101         map->def.type = type;
9102         return 0;
9103 }
9104
9105 __u32 bpf_map__map_flags(const struct bpf_map *map)
9106 {
9107         return map->def.map_flags;
9108 }
9109
9110 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
9111 {
9112         if (map->fd >= 0)
9113                 return libbpf_err(-EBUSY);
9114         map->def.map_flags = flags;
9115         return 0;
9116 }
9117
9118 __u64 bpf_map__map_extra(const struct bpf_map *map)
9119 {
9120         return map->map_extra;
9121 }
9122
9123 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
9124 {
9125         if (map->fd >= 0)
9126                 return libbpf_err(-EBUSY);
9127         map->map_extra = map_extra;
9128         return 0;
9129 }
9130
9131 __u32 bpf_map__numa_node(const struct bpf_map *map)
9132 {
9133         return map->numa_node;
9134 }
9135
9136 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
9137 {
9138         if (map->fd >= 0)
9139                 return libbpf_err(-EBUSY);
9140         map->numa_node = numa_node;
9141         return 0;
9142 }
9143
9144 __u32 bpf_map__key_size(const struct bpf_map *map)
9145 {
9146         return map->def.key_size;
9147 }
9148
9149 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
9150 {
9151         if (map->fd >= 0)
9152                 return libbpf_err(-EBUSY);
9153         map->def.key_size = size;
9154         return 0;
9155 }
9156
9157 __u32 bpf_map__value_size(const struct bpf_map *map)
9158 {
9159         return map->def.value_size;
9160 }
9161
9162 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
9163 {
9164         if (map->fd >= 0)
9165                 return libbpf_err(-EBUSY);
9166         map->def.value_size = size;
9167         return 0;
9168 }
9169
9170 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
9171 {
9172         return map ? map->btf_key_type_id : 0;
9173 }
9174
9175 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
9176 {
9177         return map ? map->btf_value_type_id : 0;
9178 }
9179
9180 int bpf_map__set_initial_value(struct bpf_map *map,
9181                                const void *data, size_t size)
9182 {
9183         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
9184             size != map->def.value_size || map->fd >= 0)
9185                 return libbpf_err(-EINVAL);
9186
9187         memcpy(map->mmaped, data, size);
9188         return 0;
9189 }
9190
9191 const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
9192 {
9193         if (!map->mmaped)
9194                 return NULL;
9195         *psize = map->def.value_size;
9196         return map->mmaped;
9197 }
9198
9199 bool bpf_map__is_internal(const struct bpf_map *map)
9200 {
9201         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
9202 }
9203
9204 __u32 bpf_map__ifindex(const struct bpf_map *map)
9205 {
9206         return map->map_ifindex;
9207 }
9208
9209 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
9210 {
9211         if (map->fd >= 0)
9212                 return libbpf_err(-EBUSY);
9213         map->map_ifindex = ifindex;
9214         return 0;
9215 }
9216
9217 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
9218 {
9219         if (!bpf_map_type__is_map_in_map(map->def.type)) {
9220                 pr_warn("error: unsupported map type\n");
9221                 return libbpf_err(-EINVAL);
9222         }
9223         if (map->inner_map_fd != -1) {
9224                 pr_warn("error: inner_map_fd already specified\n");
9225                 return libbpf_err(-EINVAL);
9226         }
9227         if (map->inner_map) {
9228                 bpf_map__destroy(map->inner_map);
9229                 zfree(&map->inner_map);
9230         }
9231         map->inner_map_fd = fd;
9232         return 0;
9233 }
9234
9235 static struct bpf_map *
9236 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
9237 {
9238         ssize_t idx;
9239         struct bpf_map *s, *e;
9240
9241         if (!obj || !obj->maps)
9242                 return errno = EINVAL, NULL;
9243
9244         s = obj->maps;
9245         e = obj->maps + obj->nr_maps;
9246
9247         if ((m < s) || (m >= e)) {
9248                 pr_warn("error in %s: map handler doesn't belong to object\n",
9249                          __func__);
9250                 return errno = EINVAL, NULL;
9251         }
9252
9253         idx = (m - obj->maps) + i;
9254         if (idx >= obj->nr_maps || idx < 0)
9255                 return NULL;
9256         return &obj->maps[idx];
9257 }
9258
9259 struct bpf_map *
9260 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
9261 {
9262         if (prev == NULL)
9263                 return obj->maps;
9264
9265         return __bpf_map__iter(prev, obj, 1);
9266 }
9267
9268 struct bpf_map *
9269 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
9270 {
9271         if (next == NULL) {
9272                 if (!obj->nr_maps)
9273                         return NULL;
9274                 return obj->maps + obj->nr_maps - 1;
9275         }
9276
9277         return __bpf_map__iter(next, obj, -1);
9278 }
9279
9280 struct bpf_map *
9281 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
9282 {
9283         struct bpf_map *pos;
9284
9285         bpf_object__for_each_map(pos, obj) {
9286                 /* if it's a special internal map name (which always starts
9287                  * with dot) then check if that special name matches the
9288                  * real map name (ELF section name)
9289                  */
9290                 if (name[0] == '.') {
9291                         if (pos->real_name && strcmp(pos->real_name, name) == 0)
9292                                 return pos;
9293                         continue;
9294                 }
9295                 /* otherwise map name has to be an exact match */
9296                 if (map_uses_real_name(pos)) {
9297                         if (strcmp(pos->real_name, name) == 0)
9298                                 return pos;
9299                         continue;
9300                 }
9301                 if (strcmp(pos->name, name) == 0)
9302                         return pos;
9303         }
9304         return errno = ENOENT, NULL;
9305 }
9306
9307 int
9308 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
9309 {
9310         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
9311 }
9312
9313 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
9314                            size_t value_sz, bool check_value_sz)
9315 {
9316         if (map->fd <= 0)
9317                 return -ENOENT;
9318
9319         if (map->def.key_size != key_sz) {
9320                 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
9321                         map->name, key_sz, map->def.key_size);
9322                 return -EINVAL;
9323         }
9324
9325         if (!check_value_sz)
9326                 return 0;
9327
9328         switch (map->def.type) {
9329         case BPF_MAP_TYPE_PERCPU_ARRAY:
9330         case BPF_MAP_TYPE_PERCPU_HASH:
9331         case BPF_MAP_TYPE_LRU_PERCPU_HASH:
9332         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
9333                 int num_cpu = libbpf_num_possible_cpus();
9334                 size_t elem_sz = roundup(map->def.value_size, 8);
9335
9336                 if (value_sz != num_cpu * elem_sz) {
9337                         pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
9338                                 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
9339                         return -EINVAL;
9340                 }
9341                 break;
9342         }
9343         default:
9344                 if (map->def.value_size != value_sz) {
9345                         pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
9346                                 map->name, value_sz, map->def.value_size);
9347                         return -EINVAL;
9348                 }
9349                 break;
9350         }
9351         return 0;
9352 }
9353
9354 int bpf_map__lookup_elem(const struct bpf_map *map,
9355                          const void *key, size_t key_sz,
9356                          void *value, size_t value_sz, __u64 flags)
9357 {
9358         int err;
9359
9360         err = validate_map_op(map, key_sz, value_sz, true);
9361         if (err)
9362                 return libbpf_err(err);
9363
9364         return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
9365 }
9366
9367 int bpf_map__update_elem(const struct bpf_map *map,
9368                          const void *key, size_t key_sz,
9369                          const void *value, size_t value_sz, __u64 flags)
9370 {
9371         int err;
9372
9373         err = validate_map_op(map, key_sz, value_sz, true);
9374         if (err)
9375                 return libbpf_err(err);
9376
9377         return bpf_map_update_elem(map->fd, key, value, flags);
9378 }
9379
9380 int bpf_map__delete_elem(const struct bpf_map *map,
9381                          const void *key, size_t key_sz, __u64 flags)
9382 {
9383         int err;
9384
9385         err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
9386         if (err)
9387                 return libbpf_err(err);
9388
9389         return bpf_map_delete_elem_flags(map->fd, key, flags);
9390 }
9391
9392 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
9393                                     const void *key, size_t key_sz,
9394                                     void *value, size_t value_sz, __u64 flags)
9395 {
9396         int err;
9397
9398         err = validate_map_op(map, key_sz, value_sz, true);
9399         if (err)
9400                 return libbpf_err(err);
9401
9402         return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
9403 }
9404
9405 int bpf_map__get_next_key(const struct bpf_map *map,
9406                           const void *cur_key, void *next_key, size_t key_sz)
9407 {
9408         int err;
9409
9410         err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
9411         if (err)
9412                 return libbpf_err(err);
9413
9414         return bpf_map_get_next_key(map->fd, cur_key, next_key);
9415 }
9416
9417 long libbpf_get_error(const void *ptr)
9418 {
9419         if (!IS_ERR_OR_NULL(ptr))
9420                 return 0;
9421
9422         if (IS_ERR(ptr))
9423                 errno = -PTR_ERR(ptr);
9424
9425         /* If ptr == NULL, then errno should be already set by the failing
9426          * API, because libbpf never returns NULL on success and it now always
9427          * sets errno on error. So no extra errno handling for ptr == NULL
9428          * case.
9429          */
9430         return -errno;
9431 }
9432
9433 /* Replace link's underlying BPF program with the new one */
9434 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
9435 {
9436         int ret;
9437
9438         ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
9439         return libbpf_err_errno(ret);
9440 }
9441
9442 /* Release "ownership" of underlying BPF resource (typically, BPF program
9443  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
9444  * link, when destructed through bpf_link__destroy() call won't attempt to
9445  * detach/unregisted that BPF resource. This is useful in situations where,
9446  * say, attached BPF program has to outlive userspace program that attached it
9447  * in the system. Depending on type of BPF program, though, there might be
9448  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
9449  * exit of userspace program doesn't trigger automatic detachment and clean up
9450  * inside the kernel.
9451  */
9452 void bpf_link__disconnect(struct bpf_link *link)
9453 {
9454         link->disconnected = true;
9455 }
9456
9457 int bpf_link__destroy(struct bpf_link *link)
9458 {
9459         int err = 0;
9460
9461         if (IS_ERR_OR_NULL(link))
9462                 return 0;
9463
9464         if (!link->disconnected && link->detach)
9465                 err = link->detach(link);
9466         if (link->pin_path)
9467                 free(link->pin_path);
9468         if (link->dealloc)
9469                 link->dealloc(link);
9470         else
9471                 free(link);
9472
9473         return libbpf_err(err);
9474 }
9475
9476 int bpf_link__fd(const struct bpf_link *link)
9477 {
9478         return link->fd;
9479 }
9480
9481 const char *bpf_link__pin_path(const struct bpf_link *link)
9482 {
9483         return link->pin_path;
9484 }
9485
9486 static int bpf_link__detach_fd(struct bpf_link *link)
9487 {
9488         return libbpf_err_errno(close(link->fd));
9489 }
9490
9491 struct bpf_link *bpf_link__open(const char *path)
9492 {
9493         struct bpf_link *link;
9494         int fd;
9495
9496         fd = bpf_obj_get(path);
9497         if (fd < 0) {
9498                 fd = -errno;
9499                 pr_warn("failed to open link at %s: %d\n", path, fd);
9500                 return libbpf_err_ptr(fd);
9501         }
9502
9503         link = calloc(1, sizeof(*link));
9504         if (!link) {
9505                 close(fd);
9506                 return libbpf_err_ptr(-ENOMEM);
9507         }
9508         link->detach = &bpf_link__detach_fd;
9509         link->fd = fd;
9510
9511         link->pin_path = strdup(path);
9512         if (!link->pin_path) {
9513                 bpf_link__destroy(link);
9514                 return libbpf_err_ptr(-ENOMEM);
9515         }
9516
9517         return link;
9518 }
9519
9520 int bpf_link__detach(struct bpf_link *link)
9521 {
9522         return bpf_link_detach(link->fd) ? -errno : 0;
9523 }
9524
9525 int bpf_link__pin(struct bpf_link *link, const char *path)
9526 {
9527         int err;
9528
9529         if (link->pin_path)
9530                 return libbpf_err(-EBUSY);
9531         err = make_parent_dir(path);
9532         if (err)
9533                 return libbpf_err(err);
9534         err = check_path(path);
9535         if (err)
9536                 return libbpf_err(err);
9537
9538         link->pin_path = strdup(path);
9539         if (!link->pin_path)
9540                 return libbpf_err(-ENOMEM);
9541
9542         if (bpf_obj_pin(link->fd, link->pin_path)) {
9543                 err = -errno;
9544                 zfree(&link->pin_path);
9545                 return libbpf_err(err);
9546         }
9547
9548         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
9549         return 0;
9550 }
9551
9552 int bpf_link__unpin(struct bpf_link *link)
9553 {
9554         int err;
9555
9556         if (!link->pin_path)
9557                 return libbpf_err(-EINVAL);
9558
9559         err = unlink(link->pin_path);
9560         if (err != 0)
9561                 return -errno;
9562
9563         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
9564         zfree(&link->pin_path);
9565         return 0;
9566 }
9567
9568 struct bpf_link_perf {
9569         struct bpf_link link;
9570         int perf_event_fd;
9571         /* legacy kprobe support: keep track of probe identifier and type */
9572         char *legacy_probe_name;
9573         bool legacy_is_kprobe;
9574         bool legacy_is_retprobe;
9575 };
9576
9577 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
9578 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
9579
9580 static int bpf_link_perf_detach(struct bpf_link *link)
9581 {
9582         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
9583         int err = 0;
9584
9585         if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
9586                 err = -errno;
9587
9588         if (perf_link->perf_event_fd != link->fd)
9589                 close(perf_link->perf_event_fd);
9590         close(link->fd);
9591
9592         /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
9593         if (perf_link->legacy_probe_name) {
9594                 if (perf_link->legacy_is_kprobe) {
9595                         err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
9596                                                          perf_link->legacy_is_retprobe);
9597                 } else {
9598                         err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
9599                                                          perf_link->legacy_is_retprobe);
9600                 }
9601         }
9602
9603         return err;
9604 }
9605
9606 static void bpf_link_perf_dealloc(struct bpf_link *link)
9607 {
9608         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
9609
9610         free(perf_link->legacy_probe_name);
9611         free(perf_link);
9612 }
9613
9614 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
9615                                                      const struct bpf_perf_event_opts *opts)
9616 {
9617         char errmsg[STRERR_BUFSIZE];
9618         struct bpf_link_perf *link;
9619         int prog_fd, link_fd = -1, err;
9620
9621         if (!OPTS_VALID(opts, bpf_perf_event_opts))
9622                 return libbpf_err_ptr(-EINVAL);
9623
9624         if (pfd < 0) {
9625                 pr_warn("prog '%s': invalid perf event FD %d\n",
9626                         prog->name, pfd);
9627                 return libbpf_err_ptr(-EINVAL);
9628         }
9629         prog_fd = bpf_program__fd(prog);
9630         if (prog_fd < 0) {
9631                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
9632                         prog->name);
9633                 return libbpf_err_ptr(-EINVAL);
9634         }
9635
9636         link = calloc(1, sizeof(*link));
9637         if (!link)
9638                 return libbpf_err_ptr(-ENOMEM);
9639         link->link.detach = &bpf_link_perf_detach;
9640         link->link.dealloc = &bpf_link_perf_dealloc;
9641         link->perf_event_fd = pfd;
9642
9643         if (kernel_supports(prog->obj, FEAT_PERF_LINK)) {
9644                 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
9645                         .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
9646
9647                 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
9648                 if (link_fd < 0) {
9649                         err = -errno;
9650                         pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
9651                                 prog->name, pfd,
9652                                 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9653                         goto err_out;
9654                 }
9655                 link->link.fd = link_fd;
9656         } else {
9657                 if (OPTS_GET(opts, bpf_cookie, 0)) {
9658                         pr_warn("prog '%s': user context value is not supported\n", prog->name);
9659                         err = -EOPNOTSUPP;
9660                         goto err_out;
9661                 }
9662
9663                 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
9664                         err = -errno;
9665                         pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
9666                                 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9667                         if (err == -EPROTO)
9668                                 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
9669                                         prog->name, pfd);
9670                         goto err_out;
9671                 }
9672                 link->link.fd = pfd;
9673         }
9674         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
9675                 err = -errno;
9676                 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
9677                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9678                 goto err_out;
9679         }
9680
9681         return &link->link;
9682 err_out:
9683         if (link_fd >= 0)
9684                 close(link_fd);
9685         free(link);
9686         return libbpf_err_ptr(err);
9687 }
9688
9689 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
9690 {
9691         return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
9692 }
9693
9694 /*
9695  * this function is expected to parse integer in the range of [0, 2^31-1] from
9696  * given file using scanf format string fmt. If actual parsed value is
9697  * negative, the result might be indistinguishable from error
9698  */
9699 static int parse_uint_from_file(const char *file, const char *fmt)
9700 {
9701         char buf[STRERR_BUFSIZE];
9702         int err, ret;
9703         FILE *f;
9704
9705         f = fopen(file, "r");
9706         if (!f) {
9707                 err = -errno;
9708                 pr_debug("failed to open '%s': %s\n", file,
9709                          libbpf_strerror_r(err, buf, sizeof(buf)));
9710                 return err;
9711         }
9712         err = fscanf(f, fmt, &ret);
9713         if (err != 1) {
9714                 err = err == EOF ? -EIO : -errno;
9715                 pr_debug("failed to parse '%s': %s\n", file,
9716                         libbpf_strerror_r(err, buf, sizeof(buf)));
9717                 fclose(f);
9718                 return err;
9719         }
9720         fclose(f);
9721         return ret;
9722 }
9723
9724 static int determine_kprobe_perf_type(void)
9725 {
9726         const char *file = "/sys/bus/event_source/devices/kprobe/type";
9727
9728         return parse_uint_from_file(file, "%d\n");
9729 }
9730
9731 static int determine_uprobe_perf_type(void)
9732 {
9733         const char *file = "/sys/bus/event_source/devices/uprobe/type";
9734
9735         return parse_uint_from_file(file, "%d\n");
9736 }
9737
9738 static int determine_kprobe_retprobe_bit(void)
9739 {
9740         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
9741
9742         return parse_uint_from_file(file, "config:%d\n");
9743 }
9744
9745 static int determine_uprobe_retprobe_bit(void)
9746 {
9747         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
9748
9749         return parse_uint_from_file(file, "config:%d\n");
9750 }
9751
9752 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
9753 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
9754
9755 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
9756                                  uint64_t offset, int pid, size_t ref_ctr_off)
9757 {
9758         struct perf_event_attr attr = {};
9759         char errmsg[STRERR_BUFSIZE];
9760         int type, pfd, err;
9761
9762         if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
9763                 return -EINVAL;
9764
9765         type = uprobe ? determine_uprobe_perf_type()
9766                       : determine_kprobe_perf_type();
9767         if (type < 0) {
9768                 pr_warn("failed to determine %s perf type: %s\n",
9769                         uprobe ? "uprobe" : "kprobe",
9770                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
9771                 return type;
9772         }
9773         if (retprobe) {
9774                 int bit = uprobe ? determine_uprobe_retprobe_bit()
9775                                  : determine_kprobe_retprobe_bit();
9776
9777                 if (bit < 0) {
9778                         pr_warn("failed to determine %s retprobe bit: %s\n",
9779                                 uprobe ? "uprobe" : "kprobe",
9780                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
9781                         return bit;
9782                 }
9783                 attr.config |= 1 << bit;
9784         }
9785         attr.size = sizeof(attr);
9786         attr.type = type;
9787         attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
9788         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
9789         attr.config2 = offset;           /* kprobe_addr or probe_offset */
9790
9791         /* pid filter is meaningful only for uprobes */
9792         pfd = syscall(__NR_perf_event_open, &attr,
9793                       pid < 0 ? -1 : pid /* pid */,
9794                       pid == -1 ? 0 : -1 /* cpu */,
9795                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
9796         if (pfd < 0) {
9797                 err = -errno;
9798                 pr_warn("%s perf_event_open() failed: %s\n",
9799                         uprobe ? "uprobe" : "kprobe",
9800                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9801                 return err;
9802         }
9803         return pfd;
9804 }
9805
9806 static int append_to_file(const char *file, const char *fmt, ...)
9807 {
9808         int fd, n, err = 0;
9809         va_list ap;
9810
9811         fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
9812         if (fd < 0)
9813                 return -errno;
9814
9815         va_start(ap, fmt);
9816         n = vdprintf(fd, fmt, ap);
9817         va_end(ap);
9818
9819         if (n < 0)
9820                 err = -errno;
9821
9822         close(fd);
9823         return err;
9824 }
9825
9826 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
9827                                          const char *kfunc_name, size_t offset)
9828 {
9829         static int index = 0;
9830
9831         snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
9832                  __sync_fetch_and_add(&index, 1));
9833 }
9834
9835 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
9836                                    const char *kfunc_name, size_t offset)
9837 {
9838         const char *file = "/sys/kernel/debug/tracing/kprobe_events";
9839
9840         return append_to_file(file, "%c:%s/%s %s+0x%zx",
9841                               retprobe ? 'r' : 'p',
9842                               retprobe ? "kretprobes" : "kprobes",
9843                               probe_name, kfunc_name, offset);
9844 }
9845
9846 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
9847 {
9848         const char *file = "/sys/kernel/debug/tracing/kprobe_events";
9849
9850         return append_to_file(file, "-:%s/%s", retprobe ? "kretprobes" : "kprobes", probe_name);
9851 }
9852
9853 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
9854 {
9855         char file[256];
9856
9857         snprintf(file, sizeof(file),
9858                  "/sys/kernel/debug/tracing/events/%s/%s/id",
9859                  retprobe ? "kretprobes" : "kprobes", probe_name);
9860
9861         return parse_uint_from_file(file, "%d\n");
9862 }
9863
9864 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
9865                                          const char *kfunc_name, size_t offset, int pid)
9866 {
9867         struct perf_event_attr attr = {};
9868         char errmsg[STRERR_BUFSIZE];
9869         int type, pfd, err;
9870
9871         err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
9872         if (err < 0) {
9873                 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
9874                         kfunc_name, offset,
9875                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9876                 return err;
9877         }
9878         type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
9879         if (type < 0) {
9880                 err = type;
9881                 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
9882                         kfunc_name, offset,
9883                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9884                 goto err_clean_legacy;
9885         }
9886         attr.size = sizeof(attr);
9887         attr.config = type;
9888         attr.type = PERF_TYPE_TRACEPOINT;
9889
9890         pfd = syscall(__NR_perf_event_open, &attr,
9891                       pid < 0 ? -1 : pid, /* pid */
9892                       pid == -1 ? 0 : -1, /* cpu */
9893                       -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
9894         if (pfd < 0) {
9895                 err = -errno;
9896                 pr_warn("legacy kprobe perf_event_open() failed: %s\n",
9897                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9898                 goto err_clean_legacy;
9899         }
9900         return pfd;
9901
9902 err_clean_legacy:
9903         /* Clear the newly added legacy kprobe_event */
9904         remove_kprobe_event_legacy(probe_name, retprobe);
9905         return err;
9906 }
9907
9908 struct bpf_link *
9909 bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
9910                                 const char *func_name,
9911                                 const struct bpf_kprobe_opts *opts)
9912 {
9913         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
9914         char errmsg[STRERR_BUFSIZE];
9915         char *legacy_probe = NULL;
9916         struct bpf_link *link;
9917         size_t offset;
9918         bool retprobe, legacy;
9919         int pfd, err;
9920
9921         if (!OPTS_VALID(opts, bpf_kprobe_opts))
9922                 return libbpf_err_ptr(-EINVAL);
9923
9924         retprobe = OPTS_GET(opts, retprobe, false);
9925         offset = OPTS_GET(opts, offset, 0);
9926         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
9927
9928         legacy = determine_kprobe_perf_type() < 0;
9929         if (!legacy) {
9930                 pfd = perf_event_open_probe(false /* uprobe */, retprobe,
9931                                             func_name, offset,
9932                                             -1 /* pid */, 0 /* ref_ctr_off */);
9933         } else {
9934                 char probe_name[256];
9935
9936                 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
9937                                              func_name, offset);
9938
9939                 legacy_probe = strdup(probe_name);
9940                 if (!legacy_probe)
9941                         return libbpf_err_ptr(-ENOMEM);
9942
9943                 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
9944                                                     offset, -1 /* pid */);
9945         }
9946         if (pfd < 0) {
9947                 err = -errno;
9948                 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
9949                         prog->name, retprobe ? "kretprobe" : "kprobe",
9950                         func_name, offset,
9951                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9952                 goto err_out;
9953         }
9954         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
9955         err = libbpf_get_error(link);
9956         if (err) {
9957                 close(pfd);
9958                 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
9959                         prog->name, retprobe ? "kretprobe" : "kprobe",
9960                         func_name, offset,
9961                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9962                 goto err_clean_legacy;
9963         }
9964         if (legacy) {
9965                 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
9966
9967                 perf_link->legacy_probe_name = legacy_probe;
9968                 perf_link->legacy_is_kprobe = true;
9969                 perf_link->legacy_is_retprobe = retprobe;
9970         }
9971
9972         return link;
9973
9974 err_clean_legacy:
9975         if (legacy)
9976                 remove_kprobe_event_legacy(legacy_probe, retprobe);
9977 err_out:
9978         free(legacy_probe);
9979         return libbpf_err_ptr(err);
9980 }
9981
9982 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
9983                                             bool retprobe,
9984                                             const char *func_name)
9985 {
9986         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
9987                 .retprobe = retprobe,
9988         );
9989
9990         return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
9991 }
9992
9993 /* Adapted from perf/util/string.c */
9994 static bool glob_match(const char *str, const char *pat)
9995 {
9996         while (*str && *pat && *pat != '*') {
9997                 if (*pat == '?') {      /* Matches any single character */
9998                         str++;
9999                         pat++;
10000                         continue;
10001                 }
10002                 if (*str != *pat)
10003                         return false;
10004                 str++;
10005                 pat++;
10006         }
10007         /* Check wild card */
10008         if (*pat == '*') {
10009                 while (*pat == '*')
10010                         pat++;
10011                 if (!*pat) /* Tail wild card matches all */
10012                         return true;
10013                 while (*str)
10014                         if (glob_match(str++, pat))
10015                                 return true;
10016         }
10017         return !*str && !*pat;
10018 }
10019
10020 struct kprobe_multi_resolve {
10021         const char *pattern;
10022         unsigned long *addrs;
10023         size_t cap;
10024         size_t cnt;
10025 };
10026
10027 static int
10028 resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
10029                         const char *sym_name, void *ctx)
10030 {
10031         struct kprobe_multi_resolve *res = ctx;
10032         int err;
10033
10034         if (!glob_match(sym_name, res->pattern))
10035                 return 0;
10036
10037         err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long),
10038                                 res->cnt + 1);
10039         if (err)
10040                 return err;
10041
10042         res->addrs[res->cnt++] = (unsigned long) sym_addr;
10043         return 0;
10044 }
10045
10046 struct bpf_link *
10047 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
10048                                       const char *pattern,
10049                                       const struct bpf_kprobe_multi_opts *opts)
10050 {
10051         LIBBPF_OPTS(bpf_link_create_opts, lopts);
10052         struct kprobe_multi_resolve res = {
10053                 .pattern = pattern,
10054         };
10055         struct bpf_link *link = NULL;
10056         char errmsg[STRERR_BUFSIZE];
10057         const unsigned long *addrs;
10058         int err, link_fd, prog_fd;
10059         const __u64 *cookies;
10060         const char **syms;
10061         bool retprobe;
10062         size_t cnt;
10063
10064         if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
10065                 return libbpf_err_ptr(-EINVAL);
10066
10067         syms    = OPTS_GET(opts, syms, false);
10068         addrs   = OPTS_GET(opts, addrs, false);
10069         cnt     = OPTS_GET(opts, cnt, false);
10070         cookies = OPTS_GET(opts, cookies, false);
10071
10072         if (!pattern && !addrs && !syms)
10073                 return libbpf_err_ptr(-EINVAL);
10074         if (pattern && (addrs || syms || cookies || cnt))
10075                 return libbpf_err_ptr(-EINVAL);
10076         if (!pattern && !cnt)
10077                 return libbpf_err_ptr(-EINVAL);
10078         if (addrs && syms)
10079                 return libbpf_err_ptr(-EINVAL);
10080
10081         if (pattern) {
10082                 err = libbpf_kallsyms_parse(resolve_kprobe_multi_cb, &res);
10083                 if (err)
10084                         goto error;
10085                 if (!res.cnt) {
10086                         err = -ENOENT;
10087                         goto error;
10088                 }
10089                 addrs = res.addrs;
10090                 cnt = res.cnt;
10091         }
10092
10093         retprobe = OPTS_GET(opts, retprobe, false);
10094
10095         lopts.kprobe_multi.syms = syms;
10096         lopts.kprobe_multi.addrs = addrs;
10097         lopts.kprobe_multi.cookies = cookies;
10098         lopts.kprobe_multi.cnt = cnt;
10099         lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
10100
10101         link = calloc(1, sizeof(*link));
10102         if (!link) {
10103                 err = -ENOMEM;
10104                 goto error;
10105         }
10106         link->detach = &bpf_link__detach_fd;
10107
10108         prog_fd = bpf_program__fd(prog);
10109         link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts);
10110         if (link_fd < 0) {
10111                 err = -errno;
10112                 pr_warn("prog '%s': failed to attach: %s\n",
10113                         prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10114                 goto error;
10115         }
10116         link->fd = link_fd;
10117         free(res.addrs);
10118         return link;
10119
10120 error:
10121         free(link);
10122         free(res.addrs);
10123         return libbpf_err_ptr(err);
10124 }
10125
10126 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10127 {
10128         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
10129         unsigned long offset = 0;
10130         const char *func_name;
10131         char *func;
10132         int n;
10133
10134         *link = NULL;
10135
10136         /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
10137         if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
10138                 return 0;
10139
10140         opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
10141         if (opts.retprobe)
10142                 func_name = prog->sec_name + sizeof("kretprobe/") - 1;
10143         else
10144                 func_name = prog->sec_name + sizeof("kprobe/") - 1;
10145
10146         n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
10147         if (n < 1) {
10148                 pr_warn("kprobe name is invalid: %s\n", func_name);
10149                 return -EINVAL;
10150         }
10151         if (opts.retprobe && offset != 0) {
10152                 free(func);
10153                 pr_warn("kretprobes do not support offset specification\n");
10154                 return -EINVAL;
10155         }
10156
10157         opts.offset = offset;
10158         *link = bpf_program__attach_kprobe_opts(prog, func, &opts);
10159         free(func);
10160         return libbpf_get_error(*link);
10161 }
10162
10163 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10164 {
10165         LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
10166         const char *spec;
10167         char *pattern;
10168         int n;
10169
10170         *link = NULL;
10171
10172         /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
10173         if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
10174             strcmp(prog->sec_name, "kretprobe.multi") == 0)
10175                 return 0;
10176
10177         opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
10178         if (opts.retprobe)
10179                 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
10180         else
10181                 spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
10182
10183         n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
10184         if (n < 1) {
10185                 pr_warn("kprobe multi pattern is invalid: %s\n", pattern);
10186                 return -EINVAL;
10187         }
10188
10189         *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
10190         free(pattern);
10191         return libbpf_get_error(*link);
10192 }
10193
10194 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
10195                                          const char *binary_path, uint64_t offset)
10196 {
10197         int i;
10198
10199         snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
10200
10201         /* sanitize binary_path in the probe name */
10202         for (i = 0; buf[i]; i++) {
10203                 if (!isalnum(buf[i]))
10204                         buf[i] = '_';
10205         }
10206 }
10207
10208 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
10209                                           const char *binary_path, size_t offset)
10210 {
10211         const char *file = "/sys/kernel/debug/tracing/uprobe_events";
10212
10213         return append_to_file(file, "%c:%s/%s %s:0x%zx",
10214                               retprobe ? 'r' : 'p',
10215                               retprobe ? "uretprobes" : "uprobes",
10216                               probe_name, binary_path, offset);
10217 }
10218
10219 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
10220 {
10221         const char *file = "/sys/kernel/debug/tracing/uprobe_events";
10222
10223         return append_to_file(file, "-:%s/%s", retprobe ? "uretprobes" : "uprobes", probe_name);
10224 }
10225
10226 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10227 {
10228         char file[512];
10229
10230         snprintf(file, sizeof(file),
10231                  "/sys/kernel/debug/tracing/events/%s/%s/id",
10232                  retprobe ? "uretprobes" : "uprobes", probe_name);
10233
10234         return parse_uint_from_file(file, "%d\n");
10235 }
10236
10237 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
10238                                          const char *binary_path, size_t offset, int pid)
10239 {
10240         struct perf_event_attr attr;
10241         int type, pfd, err;
10242
10243         err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
10244         if (err < 0) {
10245                 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
10246                         binary_path, (size_t)offset, err);
10247                 return err;
10248         }
10249         type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
10250         if (type < 0) {
10251                 err = type;
10252                 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
10253                         binary_path, offset, err);
10254                 goto err_clean_legacy;
10255         }
10256
10257         memset(&attr, 0, sizeof(attr));
10258         attr.size = sizeof(attr);
10259         attr.config = type;
10260         attr.type = PERF_TYPE_TRACEPOINT;
10261
10262         pfd = syscall(__NR_perf_event_open, &attr,
10263                       pid < 0 ? -1 : pid, /* pid */
10264                       pid == -1 ? 0 : -1, /* cpu */
10265                       -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
10266         if (pfd < 0) {
10267                 err = -errno;
10268                 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
10269                 goto err_clean_legacy;
10270         }
10271         return pfd;
10272
10273 err_clean_legacy:
10274         /* Clear the newly added legacy uprobe_event */
10275         remove_uprobe_event_legacy(probe_name, retprobe);
10276         return err;
10277 }
10278
10279 /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
10280 static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
10281 {
10282         while ((scn = elf_nextscn(elf, scn)) != NULL) {
10283                 GElf_Shdr sh;
10284
10285                 if (!gelf_getshdr(scn, &sh))
10286                         continue;
10287                 if (sh.sh_type == sh_type)
10288                         return scn;
10289         }
10290         return NULL;
10291 }
10292
10293 /* Find offset of function name in object specified by path.  "name" matches
10294  * symbol name or name@@LIB for library functions.
10295  */
10296 static long elf_find_func_offset(const char *binary_path, const char *name)
10297 {
10298         int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
10299         bool is_shared_lib, is_name_qualified;
10300         char errmsg[STRERR_BUFSIZE];
10301         long ret = -ENOENT;
10302         size_t name_len;
10303         GElf_Ehdr ehdr;
10304         Elf *elf;
10305
10306         fd = open(binary_path, O_RDONLY | O_CLOEXEC);
10307         if (fd < 0) {
10308                 ret = -errno;
10309                 pr_warn("failed to open %s: %s\n", binary_path,
10310                         libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
10311                 return ret;
10312         }
10313         elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
10314         if (!elf) {
10315                 pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
10316                 close(fd);
10317                 return -LIBBPF_ERRNO__FORMAT;
10318         }
10319         if (!gelf_getehdr(elf, &ehdr)) {
10320                 pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
10321                 ret = -LIBBPF_ERRNO__FORMAT;
10322                 goto out;
10323         }
10324         /* for shared lib case, we do not need to calculate relative offset */
10325         is_shared_lib = ehdr.e_type == ET_DYN;
10326
10327         name_len = strlen(name);
10328         /* Does name specify "@@LIB"? */
10329         is_name_qualified = strstr(name, "@@") != NULL;
10330
10331         /* Search SHT_DYNSYM, SHT_SYMTAB for symbol.  This search order is used because if
10332          * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
10333          * linked binary may not have SHT_DYMSYM, so absence of a section should not be
10334          * reported as a warning/error.
10335          */
10336         for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
10337                 size_t nr_syms, strtabidx, idx;
10338                 Elf_Data *symbols = NULL;
10339                 Elf_Scn *scn = NULL;
10340                 int last_bind = -1;
10341                 const char *sname;
10342                 GElf_Shdr sh;
10343
10344                 scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL);
10345                 if (!scn) {
10346                         pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
10347                                  binary_path);
10348                         continue;
10349                 }
10350                 if (!gelf_getshdr(scn, &sh))
10351                         continue;
10352                 strtabidx = sh.sh_link;
10353                 symbols = elf_getdata(scn, 0);
10354                 if (!symbols) {
10355                         pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
10356                                 binary_path, elf_errmsg(-1));
10357                         ret = -LIBBPF_ERRNO__FORMAT;
10358                         goto out;
10359                 }
10360                 nr_syms = symbols->d_size / sh.sh_entsize;
10361
10362                 for (idx = 0; idx < nr_syms; idx++) {
10363                         int curr_bind;
10364                         GElf_Sym sym;
10365                         Elf_Scn *sym_scn;
10366                         GElf_Shdr sym_sh;
10367
10368                         if (!gelf_getsym(symbols, idx, &sym))
10369                                 continue;
10370
10371                         if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
10372                                 continue;
10373
10374                         sname = elf_strptr(elf, strtabidx, sym.st_name);
10375                         if (!sname)
10376                                 continue;
10377
10378                         curr_bind = GELF_ST_BIND(sym.st_info);
10379
10380                         /* User can specify func, func@@LIB or func@@LIB_VERSION. */
10381                         if (strncmp(sname, name, name_len) != 0)
10382                                 continue;
10383                         /* ...but we don't want a search for "foo" to match 'foo2" also, so any
10384                          * additional characters in sname should be of the form "@@LIB".
10385                          */
10386                         if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@')
10387                                 continue;
10388
10389                         if (ret >= 0) {
10390                                 /* handle multiple matches */
10391                                 if (last_bind != STB_WEAK && curr_bind != STB_WEAK) {
10392                                         /* Only accept one non-weak bind. */
10393                                         pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
10394                                                 sname, name, binary_path);
10395                                         ret = -LIBBPF_ERRNO__FORMAT;
10396                                         goto out;
10397                                 } else if (curr_bind == STB_WEAK) {
10398                                         /* already have a non-weak bind, and
10399                                          * this is a weak bind, so ignore.
10400                                          */
10401                                         continue;
10402                                 }
10403                         }
10404
10405                         /* Transform symbol's virtual address (absolute for
10406                          * binaries and relative for shared libs) into file
10407                          * offset, which is what kernel is expecting for
10408                          * uprobe/uretprobe attachment.
10409                          * See Documentation/trace/uprobetracer.rst for more
10410                          * details.
10411                          * This is done by looking up symbol's containing
10412                          * section's header and using it's virtual address
10413                          * (sh_addr) and corresponding file offset (sh_offset)
10414                          * to transform sym.st_value (virtual address) into
10415                          * desired final file offset.
10416                          */
10417                         sym_scn = elf_getscn(elf, sym.st_shndx);
10418                         if (!sym_scn)
10419                                 continue;
10420                         if (!gelf_getshdr(sym_scn, &sym_sh))
10421                                 continue;
10422
10423                         ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset;
10424                         last_bind = curr_bind;
10425                 }
10426                 if (ret > 0)
10427                         break;
10428         }
10429
10430         if (ret > 0) {
10431                 pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
10432                          ret);
10433         } else {
10434                 if (ret == 0) {
10435                         pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
10436                                 is_shared_lib ? "should not be 0 in a shared library" :
10437                                                 "try using shared library path instead");
10438                         ret = -ENOENT;
10439                 } else {
10440                         pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
10441                 }
10442         }
10443 out:
10444         elf_end(elf);
10445         close(fd);
10446         return ret;
10447 }
10448
10449 static const char *arch_specific_lib_paths(void)
10450 {
10451         /*
10452          * Based on https://packages.debian.org/sid/libc6.
10453          *
10454          * Assume that the traced program is built for the same architecture
10455          * as libbpf, which should cover the vast majority of cases.
10456          */
10457 #if defined(__x86_64__)
10458         return "/lib/x86_64-linux-gnu";
10459 #elif defined(__i386__)
10460         return "/lib/i386-linux-gnu";
10461 #elif defined(__s390x__)
10462         return "/lib/s390x-linux-gnu";
10463 #elif defined(__s390__)
10464         return "/lib/s390-linux-gnu";
10465 #elif defined(__arm__) && defined(__SOFTFP__)
10466         return "/lib/arm-linux-gnueabi";
10467 #elif defined(__arm__) && !defined(__SOFTFP__)
10468         return "/lib/arm-linux-gnueabihf";
10469 #elif defined(__aarch64__)
10470         return "/lib/aarch64-linux-gnu";
10471 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
10472         return "/lib/mips64el-linux-gnuabi64";
10473 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
10474         return "/lib/mipsel-linux-gnu";
10475 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
10476         return "/lib/powerpc64le-linux-gnu";
10477 #elif defined(__sparc__) && defined(__arch64__)
10478         return "/lib/sparc64-linux-gnu";
10479 #elif defined(__riscv) && __riscv_xlen == 64
10480         return "/lib/riscv64-linux-gnu";
10481 #else
10482         return NULL;
10483 #endif
10484 }
10485
10486 /* Get full path to program/shared library. */
10487 static int resolve_full_path(const char *file, char *result, size_t result_sz)
10488 {
10489         const char *search_paths[3] = {};
10490         int i;
10491
10492         if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
10493                 search_paths[0] = getenv("LD_LIBRARY_PATH");
10494                 search_paths[1] = "/usr/lib64:/usr/lib";
10495                 search_paths[2] = arch_specific_lib_paths();
10496         } else {
10497                 search_paths[0] = getenv("PATH");
10498                 search_paths[1] = "/usr/bin:/usr/sbin";
10499         }
10500
10501         for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
10502                 const char *s;
10503
10504                 if (!search_paths[i])
10505                         continue;
10506                 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
10507                         char *next_path;
10508                         int seg_len;
10509
10510                         if (s[0] == ':')
10511                                 s++;
10512                         next_path = strchr(s, ':');
10513                         seg_len = next_path ? next_path - s : strlen(s);
10514                         if (!seg_len)
10515                                 continue;
10516                         snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
10517                         /* ensure it is an executable file/link */
10518                         if (access(result, R_OK | X_OK) < 0)
10519                                 continue;
10520                         pr_debug("resolved '%s' to '%s'\n", file, result);
10521                         return 0;
10522                 }
10523         }
10524         return -ENOENT;
10525 }
10526
10527 LIBBPF_API struct bpf_link *
10528 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
10529                                 const char *binary_path, size_t func_offset,
10530                                 const struct bpf_uprobe_opts *opts)
10531 {
10532         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10533         char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
10534         char full_binary_path[PATH_MAX];
10535         struct bpf_link *link;
10536         size_t ref_ctr_off;
10537         int pfd, err;
10538         bool retprobe, legacy;
10539         const char *func_name;
10540
10541         if (!OPTS_VALID(opts, bpf_uprobe_opts))
10542                 return libbpf_err_ptr(-EINVAL);
10543
10544         retprobe = OPTS_GET(opts, retprobe, false);
10545         ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
10546         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10547
10548         if (binary_path && !strchr(binary_path, '/')) {
10549                 err = resolve_full_path(binary_path, full_binary_path,
10550                                         sizeof(full_binary_path));
10551                 if (err) {
10552                         pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
10553                                 prog->name, binary_path, err);
10554                         return libbpf_err_ptr(err);
10555                 }
10556                 binary_path = full_binary_path;
10557         }
10558         func_name = OPTS_GET(opts, func_name, NULL);
10559         if (func_name) {
10560                 long sym_off;
10561
10562                 if (!binary_path) {
10563                         pr_warn("prog '%s': name-based attach requires binary_path\n",
10564                                 prog->name);
10565                         return libbpf_err_ptr(-EINVAL);
10566                 }
10567                 sym_off = elf_find_func_offset(binary_path, func_name);
10568                 if (sym_off < 0)
10569                         return libbpf_err_ptr(sym_off);
10570                 func_offset += sym_off;
10571         }
10572
10573         legacy = determine_uprobe_perf_type() < 0;
10574         if (!legacy) {
10575                 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
10576                                             func_offset, pid, ref_ctr_off);
10577         } else {
10578                 char probe_name[PATH_MAX + 64];
10579
10580                 if (ref_ctr_off)
10581                         return libbpf_err_ptr(-EINVAL);
10582
10583                 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
10584                                              binary_path, func_offset);
10585
10586                 legacy_probe = strdup(probe_name);
10587                 if (!legacy_probe)
10588                         return libbpf_err_ptr(-ENOMEM);
10589
10590                 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
10591                                                     binary_path, func_offset, pid);
10592         }
10593         if (pfd < 0) {
10594                 err = -errno;
10595                 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
10596                         prog->name, retprobe ? "uretprobe" : "uprobe",
10597                         binary_path, func_offset,
10598                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10599                 goto err_out;
10600         }
10601
10602         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10603         err = libbpf_get_error(link);
10604         if (err) {
10605                 close(pfd);
10606                 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
10607                         prog->name, retprobe ? "uretprobe" : "uprobe",
10608                         binary_path, func_offset,
10609                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10610                 goto err_clean_legacy;
10611         }
10612         if (legacy) {
10613                 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10614
10615                 perf_link->legacy_probe_name = legacy_probe;
10616                 perf_link->legacy_is_kprobe = false;
10617                 perf_link->legacy_is_retprobe = retprobe;
10618         }
10619         return link;
10620
10621 err_clean_legacy:
10622         if (legacy)
10623                 remove_uprobe_event_legacy(legacy_probe, retprobe);
10624 err_out:
10625         free(legacy_probe);
10626         return libbpf_err_ptr(err);
10627 }
10628
10629 /* Format of u[ret]probe section definition supporting auto-attach:
10630  * u[ret]probe/binary:function[+offset]
10631  *
10632  * binary can be an absolute/relative path or a filename; the latter is resolved to a
10633  * full binary path via bpf_program__attach_uprobe_opts.
10634  *
10635  * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
10636  * specified (and auto-attach is not possible) or the above format is specified for
10637  * auto-attach.
10638  */
10639 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10640 {
10641         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
10642         char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
10643         int n, ret = -EINVAL;
10644         long offset = 0;
10645
10646         *link = NULL;
10647
10648         n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li",
10649                    &probe_type, &binary_path, &func_name, &offset);
10650         switch (n) {
10651         case 1:
10652                 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
10653                 ret = 0;
10654                 break;
10655         case 2:
10656                 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
10657                         prog->name, prog->sec_name);
10658                 break;
10659         case 3:
10660         case 4:
10661                 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
10662                                 strcmp(probe_type, "uretprobe.s") == 0;
10663                 if (opts.retprobe && offset != 0) {
10664                         pr_warn("prog '%s': uretprobes do not support offset specification\n",
10665                                 prog->name);
10666                         break;
10667                 }
10668                 opts.func_name = func_name;
10669                 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
10670                 ret = libbpf_get_error(*link);
10671                 break;
10672         default:
10673                 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
10674                         prog->sec_name);
10675                 break;
10676         }
10677         free(probe_type);
10678         free(binary_path);
10679         free(func_name);
10680
10681         return ret;
10682 }
10683
10684 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
10685                                             bool retprobe, pid_t pid,
10686                                             const char *binary_path,
10687                                             size_t func_offset)
10688 {
10689         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
10690
10691         return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
10692 }
10693
10694 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
10695                                           pid_t pid, const char *binary_path,
10696                                           const char *usdt_provider, const char *usdt_name,
10697                                           const struct bpf_usdt_opts *opts)
10698 {
10699         char resolved_path[512];
10700         struct bpf_object *obj = prog->obj;
10701         struct bpf_link *link;
10702         __u64 usdt_cookie;
10703         int err;
10704
10705         if (!OPTS_VALID(opts, bpf_uprobe_opts))
10706                 return libbpf_err_ptr(-EINVAL);
10707
10708         if (bpf_program__fd(prog) < 0) {
10709                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
10710                         prog->name);
10711                 return libbpf_err_ptr(-EINVAL);
10712         }
10713
10714         if (!strchr(binary_path, '/')) {
10715                 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
10716                 if (err) {
10717                         pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
10718                                 prog->name, binary_path, err);
10719                         return libbpf_err_ptr(err);
10720                 }
10721                 binary_path = resolved_path;
10722         }
10723
10724         /* USDT manager is instantiated lazily on first USDT attach. It will
10725          * be destroyed together with BPF object in bpf_object__close().
10726          */
10727         if (IS_ERR(obj->usdt_man))
10728                 return libbpf_ptr(obj->usdt_man);
10729         if (!obj->usdt_man) {
10730                 obj->usdt_man = usdt_manager_new(obj);
10731                 if (IS_ERR(obj->usdt_man))
10732                         return libbpf_ptr(obj->usdt_man);
10733         }
10734
10735         usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
10736         link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
10737                                         usdt_provider, usdt_name, usdt_cookie);
10738         err = libbpf_get_error(link);
10739         if (err)
10740                 return libbpf_err_ptr(err);
10741         return link;
10742 }
10743
10744 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10745 {
10746         char *path = NULL, *provider = NULL, *name = NULL;
10747         const char *sec_name;
10748         int n, err;
10749
10750         sec_name = bpf_program__section_name(prog);
10751         if (strcmp(sec_name, "usdt") == 0) {
10752                 /* no auto-attach for just SEC("usdt") */
10753                 *link = NULL;
10754                 return 0;
10755         }
10756
10757         n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
10758         if (n != 3) {
10759                 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
10760                         sec_name);
10761                 err = -EINVAL;
10762         } else {
10763                 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
10764                                                  provider, name, NULL);
10765                 err = libbpf_get_error(*link);
10766         }
10767         free(path);
10768         free(provider);
10769         free(name);
10770         return err;
10771 }
10772
10773 static int determine_tracepoint_id(const char *tp_category,
10774                                    const char *tp_name)
10775 {
10776         char file[PATH_MAX];
10777         int ret;
10778
10779         ret = snprintf(file, sizeof(file),
10780                        "/sys/kernel/debug/tracing/events/%s/%s/id",
10781                        tp_category, tp_name);
10782         if (ret < 0)
10783                 return -errno;
10784         if (ret >= sizeof(file)) {
10785                 pr_debug("tracepoint %s/%s path is too long\n",
10786                          tp_category, tp_name);
10787                 return -E2BIG;
10788         }
10789         return parse_uint_from_file(file, "%d\n");
10790 }
10791
10792 static int perf_event_open_tracepoint(const char *tp_category,
10793                                       const char *tp_name)
10794 {
10795         struct perf_event_attr attr = {};
10796         char errmsg[STRERR_BUFSIZE];
10797         int tp_id, pfd, err;
10798
10799         tp_id = determine_tracepoint_id(tp_category, tp_name);
10800         if (tp_id < 0) {
10801                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
10802                         tp_category, tp_name,
10803                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
10804                 return tp_id;
10805         }
10806
10807         attr.type = PERF_TYPE_TRACEPOINT;
10808         attr.size = sizeof(attr);
10809         attr.config = tp_id;
10810
10811         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
10812                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10813         if (pfd < 0) {
10814                 err = -errno;
10815                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
10816                         tp_category, tp_name,
10817                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10818                 return err;
10819         }
10820         return pfd;
10821 }
10822
10823 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
10824                                                      const char *tp_category,
10825                                                      const char *tp_name,
10826                                                      const struct bpf_tracepoint_opts *opts)
10827 {
10828         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10829         char errmsg[STRERR_BUFSIZE];
10830         struct bpf_link *link;
10831         int pfd, err;
10832
10833         if (!OPTS_VALID(opts, bpf_tracepoint_opts))
10834                 return libbpf_err_ptr(-EINVAL);
10835
10836         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10837
10838         pfd = perf_event_open_tracepoint(tp_category, tp_name);
10839         if (pfd < 0) {
10840                 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
10841                         prog->name, tp_category, tp_name,
10842                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10843                 return libbpf_err_ptr(pfd);
10844         }
10845         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10846         err = libbpf_get_error(link);
10847         if (err) {
10848                 close(pfd);
10849                 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
10850                         prog->name, tp_category, tp_name,
10851                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10852                 return libbpf_err_ptr(err);
10853         }
10854         return link;
10855 }
10856
10857 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
10858                                                 const char *tp_category,
10859                                                 const char *tp_name)
10860 {
10861         return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
10862 }
10863
10864 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10865 {
10866         char *sec_name, *tp_cat, *tp_name;
10867
10868         *link = NULL;
10869
10870         /* no auto-attach for SEC("tp") or SEC("tracepoint") */
10871         if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
10872                 return 0;
10873
10874         sec_name = strdup(prog->sec_name);
10875         if (!sec_name)
10876                 return -ENOMEM;
10877
10878         /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
10879         if (str_has_pfx(prog->sec_name, "tp/"))
10880                 tp_cat = sec_name + sizeof("tp/") - 1;
10881         else
10882                 tp_cat = sec_name + sizeof("tracepoint/") - 1;
10883         tp_name = strchr(tp_cat, '/');
10884         if (!tp_name) {
10885                 free(sec_name);
10886                 return -EINVAL;
10887         }
10888         *tp_name = '\0';
10889         tp_name++;
10890
10891         *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
10892         free(sec_name);
10893         return libbpf_get_error(*link);
10894 }
10895
10896 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
10897                                                     const char *tp_name)
10898 {
10899         char errmsg[STRERR_BUFSIZE];
10900         struct bpf_link *link;
10901         int prog_fd, pfd;
10902
10903         prog_fd = bpf_program__fd(prog);
10904         if (prog_fd < 0) {
10905                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10906                 return libbpf_err_ptr(-EINVAL);
10907         }
10908
10909         link = calloc(1, sizeof(*link));
10910         if (!link)
10911                 return libbpf_err_ptr(-ENOMEM);
10912         link->detach = &bpf_link__detach_fd;
10913
10914         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
10915         if (pfd < 0) {
10916                 pfd = -errno;
10917                 free(link);
10918                 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
10919                         prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10920                 return libbpf_err_ptr(pfd);
10921         }
10922         link->fd = pfd;
10923         return link;
10924 }
10925
10926 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10927 {
10928         static const char *const prefixes[] = {
10929                 "raw_tp",
10930                 "raw_tracepoint",
10931                 "raw_tp.w",
10932                 "raw_tracepoint.w",
10933         };
10934         size_t i;
10935         const char *tp_name = NULL;
10936
10937         *link = NULL;
10938
10939         for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
10940                 size_t pfx_len;
10941
10942                 if (!str_has_pfx(prog->sec_name, prefixes[i]))
10943                         continue;
10944
10945                 pfx_len = strlen(prefixes[i]);
10946                 /* no auto-attach case of, e.g., SEC("raw_tp") */
10947                 if (prog->sec_name[pfx_len] == '\0')
10948                         return 0;
10949
10950                 if (prog->sec_name[pfx_len] != '/')
10951                         continue;
10952
10953                 tp_name = prog->sec_name + pfx_len + 1;
10954                 break;
10955         }
10956
10957         if (!tp_name) {
10958                 pr_warn("prog '%s': invalid section name '%s'\n",
10959                         prog->name, prog->sec_name);
10960                 return -EINVAL;
10961         }
10962
10963         *link = bpf_program__attach_raw_tracepoint(prog, tp_name);
10964         return libbpf_get_error(link);
10965 }
10966
10967 /* Common logic for all BPF program types that attach to a btf_id */
10968 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
10969                                                    const struct bpf_trace_opts *opts)
10970 {
10971         LIBBPF_OPTS(bpf_link_create_opts, link_opts);
10972         char errmsg[STRERR_BUFSIZE];
10973         struct bpf_link *link;
10974         int prog_fd, pfd;
10975
10976         if (!OPTS_VALID(opts, bpf_trace_opts))
10977                 return libbpf_err_ptr(-EINVAL);
10978
10979         prog_fd = bpf_program__fd(prog);
10980         if (prog_fd < 0) {
10981                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10982                 return libbpf_err_ptr(-EINVAL);
10983         }
10984
10985         link = calloc(1, sizeof(*link));
10986         if (!link)
10987                 return libbpf_err_ptr(-ENOMEM);
10988         link->detach = &bpf_link__detach_fd;
10989
10990         /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
10991         link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
10992         pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
10993         if (pfd < 0) {
10994                 pfd = -errno;
10995                 free(link);
10996                 pr_warn("prog '%s': failed to attach: %s\n",
10997                         prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10998                 return libbpf_err_ptr(pfd);
10999         }
11000         link->fd = pfd;
11001         return link;
11002 }
11003
11004 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
11005 {
11006         return bpf_program__attach_btf_id(prog, NULL);
11007 }
11008
11009 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
11010                                                 const struct bpf_trace_opts *opts)
11011 {
11012         return bpf_program__attach_btf_id(prog, opts);
11013 }
11014
11015 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
11016 {
11017         return bpf_program__attach_btf_id(prog, NULL);
11018 }
11019
11020 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11021 {
11022         *link = bpf_program__attach_trace(prog);
11023         return libbpf_get_error(*link);
11024 }
11025
11026 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11027 {
11028         *link = bpf_program__attach_lsm(prog);
11029         return libbpf_get_error(*link);
11030 }
11031
11032 static struct bpf_link *
11033 bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id,
11034                        const char *target_name)
11035 {
11036         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
11037                             .target_btf_id = btf_id);
11038         enum bpf_attach_type attach_type;
11039         char errmsg[STRERR_BUFSIZE];
11040         struct bpf_link *link;
11041         int prog_fd, link_fd;
11042
11043         prog_fd = bpf_program__fd(prog);
11044         if (prog_fd < 0) {
11045                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11046                 return libbpf_err_ptr(-EINVAL);
11047         }
11048
11049         link = calloc(1, sizeof(*link));
11050         if (!link)
11051                 return libbpf_err_ptr(-ENOMEM);
11052         link->detach = &bpf_link__detach_fd;
11053
11054         attach_type = bpf_program__expected_attach_type(prog);
11055         link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
11056         if (link_fd < 0) {
11057                 link_fd = -errno;
11058                 free(link);
11059                 pr_warn("prog '%s': failed to attach to %s: %s\n",
11060                         prog->name, target_name,
11061                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
11062                 return libbpf_err_ptr(link_fd);
11063         }
11064         link->fd = link_fd;
11065         return link;
11066 }
11067
11068 struct bpf_link *
11069 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
11070 {
11071         return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
11072 }
11073
11074 struct bpf_link *
11075 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
11076 {
11077         return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
11078 }
11079
11080 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
11081 {
11082         /* target_fd/target_ifindex use the same field in LINK_CREATE */
11083         return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
11084 }
11085
11086 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
11087                                               int target_fd,
11088                                               const char *attach_func_name)
11089 {
11090         int btf_id;
11091
11092         if (!!target_fd != !!attach_func_name) {
11093                 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
11094                         prog->name);
11095                 return libbpf_err_ptr(-EINVAL);
11096         }
11097
11098         if (prog->type != BPF_PROG_TYPE_EXT) {
11099                 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
11100                         prog->name);
11101                 return libbpf_err_ptr(-EINVAL);
11102         }
11103
11104         if (target_fd) {
11105                 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
11106                 if (btf_id < 0)
11107                         return libbpf_err_ptr(btf_id);
11108
11109                 return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
11110         } else {
11111                 /* no target, so use raw_tracepoint_open for compatibility
11112                  * with old kernels
11113                  */
11114                 return bpf_program__attach_trace(prog);
11115         }
11116 }
11117
11118 struct bpf_link *
11119 bpf_program__attach_iter(const struct bpf_program *prog,
11120                          const struct bpf_iter_attach_opts *opts)
11121 {
11122         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
11123         char errmsg[STRERR_BUFSIZE];
11124         struct bpf_link *link;
11125         int prog_fd, link_fd;
11126         __u32 target_fd = 0;
11127
11128         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
11129                 return libbpf_err_ptr(-EINVAL);
11130
11131         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
11132         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
11133
11134         prog_fd = bpf_program__fd(prog);
11135         if (prog_fd < 0) {
11136                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11137                 return libbpf_err_ptr(-EINVAL);
11138         }
11139
11140         link = calloc(1, sizeof(*link));
11141         if (!link)
11142                 return libbpf_err_ptr(-ENOMEM);
11143         link->detach = &bpf_link__detach_fd;
11144
11145         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
11146                                   &link_create_opts);
11147         if (link_fd < 0) {
11148                 link_fd = -errno;
11149                 free(link);
11150                 pr_warn("prog '%s': failed to attach to iterator: %s\n",
11151                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
11152                 return libbpf_err_ptr(link_fd);
11153         }
11154         link->fd = link_fd;
11155         return link;
11156 }
11157
11158 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11159 {
11160         *link = bpf_program__attach_iter(prog, NULL);
11161         return libbpf_get_error(*link);
11162 }
11163
11164 struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
11165 {
11166         struct bpf_link *link = NULL;
11167         int err;
11168
11169         if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
11170                 return libbpf_err_ptr(-EOPNOTSUPP);
11171
11172         err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
11173         if (err)
11174                 return libbpf_err_ptr(err);
11175
11176         /* When calling bpf_program__attach() explicitly, auto-attach support
11177          * is expected to work, so NULL returned link is considered an error.
11178          * This is different for skeleton's attach, see comment in
11179          * bpf_object__attach_skeleton().
11180          */
11181         if (!link)
11182                 return libbpf_err_ptr(-EOPNOTSUPP);
11183
11184         return link;
11185 }
11186
11187 static int bpf_link__detach_struct_ops(struct bpf_link *link)
11188 {
11189         __u32 zero = 0;
11190
11191         if (bpf_map_delete_elem(link->fd, &zero))
11192                 return -errno;
11193
11194         return 0;
11195 }
11196
11197 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
11198 {
11199         struct bpf_struct_ops *st_ops;
11200         struct bpf_link *link;
11201         __u32 i, zero = 0;
11202         int err;
11203
11204         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
11205                 return libbpf_err_ptr(-EINVAL);
11206
11207         link = calloc(1, sizeof(*link));
11208         if (!link)
11209                 return libbpf_err_ptr(-EINVAL);
11210
11211         st_ops = map->st_ops;
11212         for (i = 0; i < btf_vlen(st_ops->type); i++) {
11213                 struct bpf_program *prog = st_ops->progs[i];
11214                 void *kern_data;
11215                 int prog_fd;
11216
11217                 if (!prog)
11218                         continue;
11219
11220                 prog_fd = bpf_program__fd(prog);
11221                 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
11222                 *(unsigned long *)kern_data = prog_fd;
11223         }
11224
11225         err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
11226         if (err) {
11227                 err = -errno;
11228                 free(link);
11229                 return libbpf_err_ptr(err);
11230         }
11231
11232         link->detach = bpf_link__detach_struct_ops;
11233         link->fd = map->fd;
11234
11235         return link;
11236 }
11237
11238 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
11239                                                           void *private_data);
11240
11241 static enum bpf_perf_event_ret
11242 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
11243                        void **copy_mem, size_t *copy_size,
11244                        bpf_perf_event_print_t fn, void *private_data)
11245 {
11246         struct perf_event_mmap_page *header = mmap_mem;
11247         __u64 data_head = ring_buffer_read_head(header);
11248         __u64 data_tail = header->data_tail;
11249         void *base = ((__u8 *)header) + page_size;
11250         int ret = LIBBPF_PERF_EVENT_CONT;
11251         struct perf_event_header *ehdr;
11252         size_t ehdr_size;
11253
11254         while (data_head != data_tail) {
11255                 ehdr = base + (data_tail & (mmap_size - 1));
11256                 ehdr_size = ehdr->size;
11257
11258                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
11259                         void *copy_start = ehdr;
11260                         size_t len_first = base + mmap_size - copy_start;
11261                         size_t len_secnd = ehdr_size - len_first;
11262
11263                         if (*copy_size < ehdr_size) {
11264                                 free(*copy_mem);
11265                                 *copy_mem = malloc(ehdr_size);
11266                                 if (!*copy_mem) {
11267                                         *copy_size = 0;
11268                                         ret = LIBBPF_PERF_EVENT_ERROR;
11269                                         break;
11270                                 }
11271                                 *copy_size = ehdr_size;
11272                         }
11273
11274                         memcpy(*copy_mem, copy_start, len_first);
11275                         memcpy(*copy_mem + len_first, base, len_secnd);
11276                         ehdr = *copy_mem;
11277                 }
11278
11279                 ret = fn(ehdr, private_data);
11280                 data_tail += ehdr_size;
11281                 if (ret != LIBBPF_PERF_EVENT_CONT)
11282                         break;
11283         }
11284
11285         ring_buffer_write_tail(header, data_tail);
11286         return libbpf_err(ret);
11287 }
11288
11289 struct perf_buffer;
11290
11291 struct perf_buffer_params {
11292         struct perf_event_attr *attr;
11293         /* if event_cb is specified, it takes precendence */
11294         perf_buffer_event_fn event_cb;
11295         /* sample_cb and lost_cb are higher-level common-case callbacks */
11296         perf_buffer_sample_fn sample_cb;
11297         perf_buffer_lost_fn lost_cb;
11298         void *ctx;
11299         int cpu_cnt;
11300         int *cpus;
11301         int *map_keys;
11302 };
11303
11304 struct perf_cpu_buf {
11305         struct perf_buffer *pb;
11306         void *base; /* mmap()'ed memory */
11307         void *buf; /* for reconstructing segmented data */
11308         size_t buf_size;
11309         int fd;
11310         int cpu;
11311         int map_key;
11312 };
11313
11314 struct perf_buffer {
11315         perf_buffer_event_fn event_cb;
11316         perf_buffer_sample_fn sample_cb;
11317         perf_buffer_lost_fn lost_cb;
11318         void *ctx; /* passed into callbacks */
11319
11320         size_t page_size;
11321         size_t mmap_size;
11322         struct perf_cpu_buf **cpu_bufs;
11323         struct epoll_event *events;
11324         int cpu_cnt; /* number of allocated CPU buffers */
11325         int epoll_fd; /* perf event FD */
11326         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
11327 };
11328
11329 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
11330                                       struct perf_cpu_buf *cpu_buf)
11331 {
11332         if (!cpu_buf)
11333                 return;
11334         if (cpu_buf->base &&
11335             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
11336                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
11337         if (cpu_buf->fd >= 0) {
11338                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
11339                 close(cpu_buf->fd);
11340         }
11341         free(cpu_buf->buf);
11342         free(cpu_buf);
11343 }
11344
11345 void perf_buffer__free(struct perf_buffer *pb)
11346 {
11347         int i;
11348
11349         if (IS_ERR_OR_NULL(pb))
11350                 return;
11351         if (pb->cpu_bufs) {
11352                 for (i = 0; i < pb->cpu_cnt; i++) {
11353                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
11354
11355                         if (!cpu_buf)
11356                                 continue;
11357
11358                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
11359                         perf_buffer__free_cpu_buf(pb, cpu_buf);
11360                 }
11361                 free(pb->cpu_bufs);
11362         }
11363         if (pb->epoll_fd >= 0)
11364                 close(pb->epoll_fd);
11365         free(pb->events);
11366         free(pb);
11367 }
11368
11369 static struct perf_cpu_buf *
11370 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
11371                           int cpu, int map_key)
11372 {
11373         struct perf_cpu_buf *cpu_buf;
11374         char msg[STRERR_BUFSIZE];
11375         int err;
11376
11377         cpu_buf = calloc(1, sizeof(*cpu_buf));
11378         if (!cpu_buf)
11379                 return ERR_PTR(-ENOMEM);
11380
11381         cpu_buf->pb = pb;
11382         cpu_buf->cpu = cpu;
11383         cpu_buf->map_key = map_key;
11384
11385         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
11386                               -1, PERF_FLAG_FD_CLOEXEC);
11387         if (cpu_buf->fd < 0) {
11388                 err = -errno;
11389                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
11390                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11391                 goto error;
11392         }
11393
11394         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
11395                              PROT_READ | PROT_WRITE, MAP_SHARED,
11396                              cpu_buf->fd, 0);
11397         if (cpu_buf->base == MAP_FAILED) {
11398                 cpu_buf->base = NULL;
11399                 err = -errno;
11400                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
11401                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11402                 goto error;
11403         }
11404
11405         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
11406                 err = -errno;
11407                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
11408                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11409                 goto error;
11410         }
11411
11412         return cpu_buf;
11413
11414 error:
11415         perf_buffer__free_cpu_buf(pb, cpu_buf);
11416         return (struct perf_cpu_buf *)ERR_PTR(err);
11417 }
11418
11419 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
11420                                               struct perf_buffer_params *p);
11421
11422 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
11423                                      perf_buffer_sample_fn sample_cb,
11424                                      perf_buffer_lost_fn lost_cb,
11425                                      void *ctx,
11426                                      const struct perf_buffer_opts *opts)
11427 {
11428         struct perf_buffer_params p = {};
11429         struct perf_event_attr attr = {};
11430
11431         if (!OPTS_VALID(opts, perf_buffer_opts))
11432                 return libbpf_err_ptr(-EINVAL);
11433
11434         attr.config = PERF_COUNT_SW_BPF_OUTPUT;
11435         attr.type = PERF_TYPE_SOFTWARE;
11436         attr.sample_type = PERF_SAMPLE_RAW;
11437         attr.sample_period = 1;
11438         attr.wakeup_events = 1;
11439
11440         p.attr = &attr;
11441         p.sample_cb = sample_cb;
11442         p.lost_cb = lost_cb;
11443         p.ctx = ctx;
11444
11445         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
11446 }
11447
11448 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
11449                                          struct perf_event_attr *attr,
11450                                          perf_buffer_event_fn event_cb, void *ctx,
11451                                          const struct perf_buffer_raw_opts *opts)
11452 {
11453         struct perf_buffer_params p = {};
11454
11455         if (!attr)
11456                 return libbpf_err_ptr(-EINVAL);
11457
11458         if (!OPTS_VALID(opts, perf_buffer_raw_opts))
11459                 return libbpf_err_ptr(-EINVAL);
11460
11461         p.attr = attr;
11462         p.event_cb = event_cb;
11463         p.ctx = ctx;
11464         p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
11465         p.cpus = OPTS_GET(opts, cpus, NULL);
11466         p.map_keys = OPTS_GET(opts, map_keys, NULL);
11467
11468         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
11469 }
11470
11471 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
11472                                               struct perf_buffer_params *p)
11473 {
11474         const char *online_cpus_file = "/sys/devices/system/cpu/online";
11475         struct bpf_map_info map;
11476         char msg[STRERR_BUFSIZE];
11477         struct perf_buffer *pb;
11478         bool *online = NULL;
11479         __u32 map_info_len;
11480         int err, i, j, n;
11481
11482         if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
11483                 pr_warn("page count should be power of two, but is %zu\n",
11484                         page_cnt);
11485                 return ERR_PTR(-EINVAL);
11486         }
11487
11488         /* best-effort sanity checks */
11489         memset(&map, 0, sizeof(map));
11490         map_info_len = sizeof(map);
11491         err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
11492         if (err) {
11493                 err = -errno;
11494                 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
11495                  * -EBADFD, -EFAULT, or -E2BIG on real error
11496                  */
11497                 if (err != -EINVAL) {
11498                         pr_warn("failed to get map info for map FD %d: %s\n",
11499                                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
11500                         return ERR_PTR(err);
11501                 }
11502                 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
11503                          map_fd);
11504         } else {
11505                 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
11506                         pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
11507                                 map.name);
11508                         return ERR_PTR(-EINVAL);
11509                 }
11510         }
11511
11512         pb = calloc(1, sizeof(*pb));
11513         if (!pb)
11514                 return ERR_PTR(-ENOMEM);
11515
11516         pb->event_cb = p->event_cb;
11517         pb->sample_cb = p->sample_cb;
11518         pb->lost_cb = p->lost_cb;
11519         pb->ctx = p->ctx;
11520
11521         pb->page_size = getpagesize();
11522         pb->mmap_size = pb->page_size * page_cnt;
11523         pb->map_fd = map_fd;
11524
11525         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
11526         if (pb->epoll_fd < 0) {
11527                 err = -errno;
11528                 pr_warn("failed to create epoll instance: %s\n",
11529                         libbpf_strerror_r(err, msg, sizeof(msg)));
11530                 goto error;
11531         }
11532
11533         if (p->cpu_cnt > 0) {
11534                 pb->cpu_cnt = p->cpu_cnt;
11535         } else {
11536                 pb->cpu_cnt = libbpf_num_possible_cpus();
11537                 if (pb->cpu_cnt < 0) {
11538                         err = pb->cpu_cnt;
11539                         goto error;
11540                 }
11541                 if (map.max_entries && map.max_entries < pb->cpu_cnt)
11542                         pb->cpu_cnt = map.max_entries;
11543         }
11544
11545         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
11546         if (!pb->events) {
11547                 err = -ENOMEM;
11548                 pr_warn("failed to allocate events: out of memory\n");
11549                 goto error;
11550         }
11551         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
11552         if (!pb->cpu_bufs) {
11553                 err = -ENOMEM;
11554                 pr_warn("failed to allocate buffers: out of memory\n");
11555                 goto error;
11556         }
11557
11558         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
11559         if (err) {
11560                 pr_warn("failed to get online CPU mask: %d\n", err);
11561                 goto error;
11562         }
11563
11564         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
11565                 struct perf_cpu_buf *cpu_buf;
11566                 int cpu, map_key;
11567
11568                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
11569                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
11570
11571                 /* in case user didn't explicitly requested particular CPUs to
11572                  * be attached to, skip offline/not present CPUs
11573                  */
11574                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
11575                         continue;
11576
11577                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
11578                 if (IS_ERR(cpu_buf)) {
11579                         err = PTR_ERR(cpu_buf);
11580                         goto error;
11581                 }
11582
11583                 pb->cpu_bufs[j] = cpu_buf;
11584
11585                 err = bpf_map_update_elem(pb->map_fd, &map_key,
11586                                           &cpu_buf->fd, 0);
11587                 if (err) {
11588                         err = -errno;
11589                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
11590                                 cpu, map_key, cpu_buf->fd,
11591                                 libbpf_strerror_r(err, msg, sizeof(msg)));
11592                         goto error;
11593                 }
11594
11595                 pb->events[j].events = EPOLLIN;
11596                 pb->events[j].data.ptr = cpu_buf;
11597                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
11598                               &pb->events[j]) < 0) {
11599                         err = -errno;
11600                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
11601                                 cpu, cpu_buf->fd,
11602                                 libbpf_strerror_r(err, msg, sizeof(msg)));
11603                         goto error;
11604                 }
11605                 j++;
11606         }
11607         pb->cpu_cnt = j;
11608         free(online);
11609
11610         return pb;
11611
11612 error:
11613         free(online);
11614         if (pb)
11615                 perf_buffer__free(pb);
11616         return ERR_PTR(err);
11617 }
11618
11619 struct perf_sample_raw {
11620         struct perf_event_header header;
11621         uint32_t size;
11622         char data[];
11623 };
11624
11625 struct perf_sample_lost {
11626         struct perf_event_header header;
11627         uint64_t id;
11628         uint64_t lost;
11629         uint64_t sample_id;
11630 };
11631
11632 static enum bpf_perf_event_ret
11633 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
11634 {
11635         struct perf_cpu_buf *cpu_buf = ctx;
11636         struct perf_buffer *pb = cpu_buf->pb;
11637         void *data = e;
11638
11639         /* user wants full control over parsing perf event */
11640         if (pb->event_cb)
11641                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
11642
11643         switch (e->type) {
11644         case PERF_RECORD_SAMPLE: {
11645                 struct perf_sample_raw *s = data;
11646
11647                 if (pb->sample_cb)
11648                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
11649                 break;
11650         }
11651         case PERF_RECORD_LOST: {
11652                 struct perf_sample_lost *s = data;
11653
11654                 if (pb->lost_cb)
11655                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
11656                 break;
11657         }
11658         default:
11659                 pr_warn("unknown perf sample type %d\n", e->type);
11660                 return LIBBPF_PERF_EVENT_ERROR;
11661         }
11662         return LIBBPF_PERF_EVENT_CONT;
11663 }
11664
11665 static int perf_buffer__process_records(struct perf_buffer *pb,
11666                                         struct perf_cpu_buf *cpu_buf)
11667 {
11668         enum bpf_perf_event_ret ret;
11669
11670         ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
11671                                      pb->page_size, &cpu_buf->buf,
11672                                      &cpu_buf->buf_size,
11673                                      perf_buffer__process_record, cpu_buf);
11674         if (ret != LIBBPF_PERF_EVENT_CONT)
11675                 return ret;
11676         return 0;
11677 }
11678
11679 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
11680 {
11681         return pb->epoll_fd;
11682 }
11683
11684 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
11685 {
11686         int i, cnt, err;
11687
11688         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
11689         if (cnt < 0)
11690                 return -errno;
11691
11692         for (i = 0; i < cnt; i++) {
11693                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
11694
11695                 err = perf_buffer__process_records(pb, cpu_buf);
11696                 if (err) {
11697                         pr_warn("error while processing records: %d\n", err);
11698                         return libbpf_err(err);
11699                 }
11700         }
11701         return cnt;
11702 }
11703
11704 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
11705  * manager.
11706  */
11707 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
11708 {
11709         return pb->cpu_cnt;
11710 }
11711
11712 /*
11713  * Return perf_event FD of a ring buffer in *buf_idx* slot of
11714  * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
11715  * select()/poll()/epoll() Linux syscalls.
11716  */
11717 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
11718 {
11719         struct perf_cpu_buf *cpu_buf;
11720
11721         if (buf_idx >= pb->cpu_cnt)
11722                 return libbpf_err(-EINVAL);
11723
11724         cpu_buf = pb->cpu_bufs[buf_idx];
11725         if (!cpu_buf)
11726                 return libbpf_err(-ENOENT);
11727
11728         return cpu_buf->fd;
11729 }
11730
11731 /*
11732  * Consume data from perf ring buffer corresponding to slot *buf_idx* in
11733  * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
11734  * consume, do nothing and return success.
11735  * Returns:
11736  *   - 0 on success;
11737  *   - <0 on failure.
11738  */
11739 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
11740 {
11741         struct perf_cpu_buf *cpu_buf;
11742
11743         if (buf_idx >= pb->cpu_cnt)
11744                 return libbpf_err(-EINVAL);
11745
11746         cpu_buf = pb->cpu_bufs[buf_idx];
11747         if (!cpu_buf)
11748                 return libbpf_err(-ENOENT);
11749
11750         return perf_buffer__process_records(pb, cpu_buf);
11751 }
11752
11753 int perf_buffer__consume(struct perf_buffer *pb)
11754 {
11755         int i, err;
11756
11757         for (i = 0; i < pb->cpu_cnt; i++) {
11758                 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
11759
11760                 if (!cpu_buf)
11761                         continue;
11762
11763                 err = perf_buffer__process_records(pb, cpu_buf);
11764                 if (err) {
11765                         pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
11766                         return libbpf_err(err);
11767                 }
11768         }
11769         return 0;
11770 }
11771
11772 int bpf_program__set_attach_target(struct bpf_program *prog,
11773                                    int attach_prog_fd,
11774                                    const char *attach_func_name)
11775 {
11776         int btf_obj_fd = 0, btf_id = 0, err;
11777
11778         if (!prog || attach_prog_fd < 0)
11779                 return libbpf_err(-EINVAL);
11780
11781         if (prog->obj->loaded)
11782                 return libbpf_err(-EINVAL);
11783
11784         if (attach_prog_fd && !attach_func_name) {
11785                 /* remember attach_prog_fd and let bpf_program__load() find
11786                  * BTF ID during the program load
11787                  */
11788                 prog->attach_prog_fd = attach_prog_fd;
11789                 return 0;
11790         }
11791
11792         if (attach_prog_fd) {
11793                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
11794                                                  attach_prog_fd);
11795                 if (btf_id < 0)
11796                         return libbpf_err(btf_id);
11797         } else {
11798                 if (!attach_func_name)
11799                         return libbpf_err(-EINVAL);
11800
11801                 /* load btf_vmlinux, if not yet */
11802                 err = bpf_object__load_vmlinux_btf(prog->obj, true);
11803                 if (err)
11804                         return libbpf_err(err);
11805                 err = find_kernel_btf_id(prog->obj, attach_func_name,
11806                                          prog->expected_attach_type,
11807                                          &btf_obj_fd, &btf_id);
11808                 if (err)
11809                         return libbpf_err(err);
11810         }
11811
11812         prog->attach_btf_id = btf_id;
11813         prog->attach_btf_obj_fd = btf_obj_fd;
11814         prog->attach_prog_fd = attach_prog_fd;
11815         return 0;
11816 }
11817
11818 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
11819 {
11820         int err = 0, n, len, start, end = -1;
11821         bool *tmp;
11822
11823         *mask = NULL;
11824         *mask_sz = 0;
11825
11826         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
11827         while (*s) {
11828                 if (*s == ',' || *s == '\n') {
11829                         s++;
11830                         continue;
11831                 }
11832                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
11833                 if (n <= 0 || n > 2) {
11834                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
11835                         err = -EINVAL;
11836                         goto cleanup;
11837                 } else if (n == 1) {
11838                         end = start;
11839                 }
11840                 if (start < 0 || start > end) {
11841                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
11842                                 start, end, s);
11843                         err = -EINVAL;
11844                         goto cleanup;
11845                 }
11846                 tmp = realloc(*mask, end + 1);
11847                 if (!tmp) {
11848                         err = -ENOMEM;
11849                         goto cleanup;
11850                 }
11851                 *mask = tmp;
11852                 memset(tmp + *mask_sz, 0, start - *mask_sz);
11853                 memset(tmp + start, 1, end - start + 1);
11854                 *mask_sz = end + 1;
11855                 s += len;
11856         }
11857         if (!*mask_sz) {
11858                 pr_warn("Empty CPU range\n");
11859                 return -EINVAL;
11860         }
11861         return 0;
11862 cleanup:
11863         free(*mask);
11864         *mask = NULL;
11865         return err;
11866 }
11867
11868 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
11869 {
11870         int fd, err = 0, len;
11871         char buf[128];
11872
11873         fd = open(fcpu, O_RDONLY | O_CLOEXEC);
11874         if (fd < 0) {
11875                 err = -errno;
11876                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
11877                 return err;
11878         }
11879         len = read(fd, buf, sizeof(buf));
11880         close(fd);
11881         if (len <= 0) {
11882                 err = len ? -errno : -EINVAL;
11883                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
11884                 return err;
11885         }
11886         if (len >= sizeof(buf)) {
11887                 pr_warn("CPU mask is too big in file %s\n", fcpu);
11888                 return -E2BIG;
11889         }
11890         buf[len] = '\0';
11891
11892         return parse_cpu_mask_str(buf, mask, mask_sz);
11893 }
11894
11895 int libbpf_num_possible_cpus(void)
11896 {
11897         static const char *fcpu = "/sys/devices/system/cpu/possible";
11898         static int cpus;
11899         int err, n, i, tmp_cpus;
11900         bool *mask;
11901
11902         tmp_cpus = READ_ONCE(cpus);
11903         if (tmp_cpus > 0)
11904                 return tmp_cpus;
11905
11906         err = parse_cpu_mask_file(fcpu, &mask, &n);
11907         if (err)
11908                 return libbpf_err(err);
11909
11910         tmp_cpus = 0;
11911         for (i = 0; i < n; i++) {
11912                 if (mask[i])
11913                         tmp_cpus++;
11914         }
11915         free(mask);
11916
11917         WRITE_ONCE(cpus, tmp_cpus);
11918         return tmp_cpus;
11919 }
11920
11921 static int populate_skeleton_maps(const struct bpf_object *obj,
11922                                   struct bpf_map_skeleton *maps,
11923                                   size_t map_cnt)
11924 {
11925         int i;
11926
11927         for (i = 0; i < map_cnt; i++) {
11928                 struct bpf_map **map = maps[i].map;
11929                 const char *name = maps[i].name;
11930                 void **mmaped = maps[i].mmaped;
11931
11932                 *map = bpf_object__find_map_by_name(obj, name);
11933                 if (!*map) {
11934                         pr_warn("failed to find skeleton map '%s'\n", name);
11935                         return -ESRCH;
11936                 }
11937
11938                 /* externs shouldn't be pre-setup from user code */
11939                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
11940                         *mmaped = (*map)->mmaped;
11941         }
11942         return 0;
11943 }
11944
11945 static int populate_skeleton_progs(const struct bpf_object *obj,
11946                                    struct bpf_prog_skeleton *progs,
11947                                    size_t prog_cnt)
11948 {
11949         int i;
11950
11951         for (i = 0; i < prog_cnt; i++) {
11952                 struct bpf_program **prog = progs[i].prog;
11953                 const char *name = progs[i].name;
11954
11955                 *prog = bpf_object__find_program_by_name(obj, name);
11956                 if (!*prog) {
11957                         pr_warn("failed to find skeleton program '%s'\n", name);
11958                         return -ESRCH;
11959                 }
11960         }
11961         return 0;
11962 }
11963
11964 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
11965                               const struct bpf_object_open_opts *opts)
11966 {
11967         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
11968                 .object_name = s->name,
11969         );
11970         struct bpf_object *obj;
11971         int err;
11972
11973         /* Attempt to preserve opts->object_name, unless overriden by user
11974          * explicitly. Overwriting object name for skeletons is discouraged,
11975          * as it breaks global data maps, because they contain object name
11976          * prefix as their own map name prefix. When skeleton is generated,
11977          * bpftool is making an assumption that this name will stay the same.
11978          */
11979         if (opts) {
11980                 memcpy(&skel_opts, opts, sizeof(*opts));
11981                 if (!opts->object_name)
11982                         skel_opts.object_name = s->name;
11983         }
11984
11985         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
11986         err = libbpf_get_error(obj);
11987         if (err) {
11988                 pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
11989                         s->name, err);
11990                 return libbpf_err(err);
11991         }
11992
11993         *s->obj = obj;
11994         err = populate_skeleton_maps(obj, s->maps, s->map_cnt);
11995         if (err) {
11996                 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
11997                 return libbpf_err(err);
11998         }
11999
12000         err = populate_skeleton_progs(obj, s->progs, s->prog_cnt);
12001         if (err) {
12002                 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
12003                 return libbpf_err(err);
12004         }
12005
12006         return 0;
12007 }
12008
12009 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
12010 {
12011         int err, len, var_idx, i;
12012         const char *var_name;
12013         const struct bpf_map *map;
12014         struct btf *btf;
12015         __u32 map_type_id;
12016         const struct btf_type *map_type, *var_type;
12017         const struct bpf_var_skeleton *var_skel;
12018         struct btf_var_secinfo *var;
12019
12020         if (!s->obj)
12021                 return libbpf_err(-EINVAL);
12022
12023         btf = bpf_object__btf(s->obj);
12024         if (!btf) {
12025                 pr_warn("subskeletons require BTF at runtime (object %s)\n",
12026                         bpf_object__name(s->obj));
12027                 return libbpf_err(-errno);
12028         }
12029
12030         err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt);
12031         if (err) {
12032                 pr_warn("failed to populate subskeleton maps: %d\n", err);
12033                 return libbpf_err(err);
12034         }
12035
12036         err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt);
12037         if (err) {
12038                 pr_warn("failed to populate subskeleton maps: %d\n", err);
12039                 return libbpf_err(err);
12040         }
12041
12042         for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
12043                 var_skel = &s->vars[var_idx];
12044                 map = *var_skel->map;
12045                 map_type_id = bpf_map__btf_value_type_id(map);
12046                 map_type = btf__type_by_id(btf, map_type_id);
12047
12048                 if (!btf_is_datasec(map_type)) {
12049                         pr_warn("type for map '%1$s' is not a datasec: %2$s",
12050                                 bpf_map__name(map),
12051                                 __btf_kind_str(btf_kind(map_type)));
12052                         return libbpf_err(-EINVAL);
12053                 }
12054
12055                 len = btf_vlen(map_type);
12056                 var = btf_var_secinfos(map_type);
12057                 for (i = 0; i < len; i++, var++) {
12058                         var_type = btf__type_by_id(btf, var->type);
12059                         var_name = btf__name_by_offset(btf, var_type->name_off);
12060                         if (strcmp(var_name, var_skel->name) == 0) {
12061                                 *var_skel->addr = map->mmaped + var->offset;
12062                                 break;
12063                         }
12064                 }
12065         }
12066         return 0;
12067 }
12068
12069 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
12070 {
12071         if (!s)
12072                 return;
12073         free(s->maps);
12074         free(s->progs);
12075         free(s->vars);
12076         free(s);
12077 }
12078
12079 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
12080 {
12081         int i, err;
12082
12083         err = bpf_object__load(*s->obj);
12084         if (err) {
12085                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
12086                 return libbpf_err(err);
12087         }
12088
12089         for (i = 0; i < s->map_cnt; i++) {
12090                 struct bpf_map *map = *s->maps[i].map;
12091                 size_t mmap_sz = bpf_map_mmap_sz(map);
12092                 int prot, map_fd = bpf_map__fd(map);
12093                 void **mmaped = s->maps[i].mmaped;
12094
12095                 if (!mmaped)
12096                         continue;
12097
12098                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
12099                         *mmaped = NULL;
12100                         continue;
12101                 }
12102
12103                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
12104                         prot = PROT_READ;
12105                 else
12106                         prot = PROT_READ | PROT_WRITE;
12107
12108                 /* Remap anonymous mmap()-ed "map initialization image" as
12109                  * a BPF map-backed mmap()-ed memory, but preserving the same
12110                  * memory address. This will cause kernel to change process'
12111                  * page table to point to a different piece of kernel memory,
12112                  * but from userspace point of view memory address (and its
12113                  * contents, being identical at this point) will stay the
12114                  * same. This mapping will be released by bpf_object__close()
12115                  * as per normal clean up procedure, so we don't need to worry
12116                  * about it from skeleton's clean up perspective.
12117                  */
12118                 *mmaped = mmap(map->mmaped, mmap_sz, prot,
12119                                 MAP_SHARED | MAP_FIXED, map_fd, 0);
12120                 if (*mmaped == MAP_FAILED) {
12121                         err = -errno;
12122                         *mmaped = NULL;
12123                         pr_warn("failed to re-mmap() map '%s': %d\n",
12124                                  bpf_map__name(map), err);
12125                         return libbpf_err(err);
12126                 }
12127         }
12128
12129         return 0;
12130 }
12131
12132 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
12133 {
12134         int i, err;
12135
12136         for (i = 0; i < s->prog_cnt; i++) {
12137                 struct bpf_program *prog = *s->progs[i].prog;
12138                 struct bpf_link **link = s->progs[i].link;
12139
12140                 if (!prog->autoload)
12141                         continue;
12142
12143                 /* auto-attaching not supported for this program */
12144                 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
12145                         continue;
12146
12147                 /* if user already set the link manually, don't attempt auto-attach */
12148                 if (*link)
12149                         continue;
12150
12151                 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
12152                 if (err) {
12153                         pr_warn("prog '%s': failed to auto-attach: %d\n",
12154                                 bpf_program__name(prog), err);
12155                         return libbpf_err(err);
12156                 }
12157
12158                 /* It's possible that for some SEC() definitions auto-attach
12159                  * is supported in some cases (e.g., if definition completely
12160                  * specifies target information), but is not in other cases.
12161                  * SEC("uprobe") is one such case. If user specified target
12162                  * binary and function name, such BPF program can be
12163                  * auto-attached. But if not, it shouldn't trigger skeleton's
12164                  * attach to fail. It should just be skipped.
12165                  * attach_fn signals such case with returning 0 (no error) and
12166                  * setting link to NULL.
12167                  */
12168         }
12169
12170         return 0;
12171 }
12172
12173 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
12174 {
12175         int i;
12176
12177         for (i = 0; i < s->prog_cnt; i++) {
12178                 struct bpf_link **link = s->progs[i].link;
12179
12180                 bpf_link__destroy(*link);
12181                 *link = NULL;
12182         }
12183 }
12184
12185 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
12186 {
12187         if (!s)
12188                 return;
12189
12190         if (s->progs)
12191                 bpf_object__detach_skeleton(s);
12192         if (s->obj)
12193                 bpf_object__close(*s->obj);
12194         free(s->maps);
12195         free(s->progs);
12196         free(s);
12197 }