libbpf: Allow decimal offset for kprobes
[linux-2.6-microblaze.git] / tools / lib / bpf / libbpf.c
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4  * Common eBPF ELF object loading operations.
5  *
6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8  * Copyright (C) 2015 Huawei Inc.
9  * Copyright (C) 2017 Nicira, Inc.
10  * Copyright (C) 2019 Isovalent, Inc.
11  */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <limits.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <endian.h>
25 #include <fcntl.h>
26 #include <errno.h>
27 #include <ctype.h>
28 #include <asm/unistd.h>
29 #include <linux/err.h>
30 #include <linux/kernel.h>
31 #include <linux/bpf.h>
32 #include <linux/btf.h>
33 #include <linux/filter.h>
34 #include <linux/list.h>
35 #include <linux/limits.h>
36 #include <linux/perf_event.h>
37 #include <linux/ring_buffer.h>
38 #include <linux/version.h>
39 #include <sys/epoll.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44 #include <sys/vfs.h>
45 #include <sys/utsname.h>
46 #include <sys/resource.h>
47 #include <libelf.h>
48 #include <gelf.h>
49 #include <zlib.h>
50
51 #include "libbpf.h"
52 #include "bpf.h"
53 #include "btf.h"
54 #include "str_error.h"
55 #include "libbpf_internal.h"
56 #include "hashmap.h"
57 #include "bpf_gen_internal.h"
58
59 #ifndef BPF_FS_MAGIC
60 #define BPF_FS_MAGIC            0xcafe4a11
61 #endif
62
63 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
64
65 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
66  * compilation if user enables corresponding warning. Disable it explicitly.
67  */
68 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
69
70 #define __printf(a, b)  __attribute__((format(printf, a, b)))
71
72 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
73 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
74
75 static int __base_pr(enum libbpf_print_level level, const char *format,
76                      va_list args)
77 {
78         if (level == LIBBPF_DEBUG)
79                 return 0;
80
81         return vfprintf(stderr, format, args);
82 }
83
84 static libbpf_print_fn_t __libbpf_pr = __base_pr;
85
86 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
87 {
88         libbpf_print_fn_t old_print_fn = __libbpf_pr;
89
90         __libbpf_pr = fn;
91         return old_print_fn;
92 }
93
94 __printf(2, 3)
95 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
96 {
97         va_list args;
98
99         if (!__libbpf_pr)
100                 return;
101
102         va_start(args, format);
103         __libbpf_pr(level, format, args);
104         va_end(args);
105 }
106
107 static void pr_perm_msg(int err)
108 {
109         struct rlimit limit;
110         char buf[100];
111
112         if (err != -EPERM || geteuid() != 0)
113                 return;
114
115         err = getrlimit(RLIMIT_MEMLOCK, &limit);
116         if (err)
117                 return;
118
119         if (limit.rlim_cur == RLIM_INFINITY)
120                 return;
121
122         if (limit.rlim_cur < 1024)
123                 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
124         else if (limit.rlim_cur < 1024*1024)
125                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
126         else
127                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
128
129         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
130                 buf);
131 }
132
133 #define STRERR_BUFSIZE  128
134
135 /* Copied from tools/perf/util/util.h */
136 #ifndef zfree
137 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
138 #endif
139
140 #ifndef zclose
141 # define zclose(fd) ({                  \
142         int ___err = 0;                 \
143         if ((fd) >= 0)                  \
144                 ___err = close((fd));   \
145         fd = -1;                        \
146         ___err; })
147 #endif
148
149 static inline __u64 ptr_to_u64(const void *ptr)
150 {
151         return (__u64) (unsigned long) ptr;
152 }
153
154 /* this goes away in libbpf 1.0 */
155 enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
156
157 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
158 {
159         /* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to
160          * get all possible values we compensate last +1, and then (2*x - 1)
161          * to get the bit mask
162          */
163         if (mode != LIBBPF_STRICT_ALL
164             && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1)))
165                 return errno = EINVAL, -EINVAL;
166
167         libbpf_mode = mode;
168         return 0;
169 }
170
171 enum kern_feature_id {
172         /* v4.14: kernel support for program & map names. */
173         FEAT_PROG_NAME,
174         /* v5.2: kernel support for global data sections. */
175         FEAT_GLOBAL_DATA,
176         /* BTF support */
177         FEAT_BTF,
178         /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
179         FEAT_BTF_FUNC,
180         /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
181         FEAT_BTF_DATASEC,
182         /* BTF_FUNC_GLOBAL is supported */
183         FEAT_BTF_GLOBAL_FUNC,
184         /* BPF_F_MMAPABLE is supported for arrays */
185         FEAT_ARRAY_MMAP,
186         /* kernel support for expected_attach_type in BPF_PROG_LOAD */
187         FEAT_EXP_ATTACH_TYPE,
188         /* bpf_probe_read_{kernel,user}[_str] helpers */
189         FEAT_PROBE_READ_KERN,
190         /* BPF_PROG_BIND_MAP is supported */
191         FEAT_PROG_BIND_MAP,
192         /* Kernel support for module BTFs */
193         FEAT_MODULE_BTF,
194         /* BTF_KIND_FLOAT support */
195         FEAT_BTF_FLOAT,
196         __FEAT_CNT,
197 };
198
199 static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
200
201 enum reloc_type {
202         RELO_LD64,
203         RELO_CALL,
204         RELO_DATA,
205         RELO_EXTERN_VAR,
206         RELO_EXTERN_FUNC,
207         RELO_SUBPROG_ADDR,
208 };
209
210 struct reloc_desc {
211         enum reloc_type type;
212         int insn_idx;
213         int map_idx;
214         int sym_off;
215 };
216
217 struct bpf_sec_def;
218
219 typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
220                                         struct bpf_program *prog);
221
222 struct bpf_sec_def {
223         const char *sec;
224         size_t len;
225         enum bpf_prog_type prog_type;
226         enum bpf_attach_type expected_attach_type;
227         bool is_exp_attach_type_optional;
228         bool is_attachable;
229         bool is_attach_btf;
230         bool is_sleepable;
231         attach_fn_t attach_fn;
232 };
233
234 /*
235  * bpf_prog should be a better name but it has been used in
236  * linux/filter.h.
237  */
238 struct bpf_program {
239         const struct bpf_sec_def *sec_def;
240         char *sec_name;
241         size_t sec_idx;
242         /* this program's instruction offset (in number of instructions)
243          * within its containing ELF section
244          */
245         size_t sec_insn_off;
246         /* number of original instructions in ELF section belonging to this
247          * program, not taking into account subprogram instructions possible
248          * appended later during relocation
249          */
250         size_t sec_insn_cnt;
251         /* Offset (in number of instructions) of the start of instruction
252          * belonging to this BPF program  within its containing main BPF
253          * program. For the entry-point (main) BPF program, this is always
254          * zero. For a sub-program, this gets reset before each of main BPF
255          * programs are processed and relocated and is used to determined
256          * whether sub-program was already appended to the main program, and
257          * if yes, at which instruction offset.
258          */
259         size_t sub_insn_off;
260
261         char *name;
262         /* sec_name with / replaced by _; makes recursive pinning
263          * in bpf_object__pin_programs easier
264          */
265         char *pin_name;
266
267         /* instructions that belong to BPF program; insns[0] is located at
268          * sec_insn_off instruction within its ELF section in ELF file, so
269          * when mapping ELF file instruction index to the local instruction,
270          * one needs to subtract sec_insn_off; and vice versa.
271          */
272         struct bpf_insn *insns;
273         /* actual number of instruction in this BPF program's image; for
274          * entry-point BPF programs this includes the size of main program
275          * itself plus all the used sub-programs, appended at the end
276          */
277         size_t insns_cnt;
278
279         struct reloc_desc *reloc_desc;
280         int nr_reloc;
281         int log_level;
282
283         struct {
284                 int nr;
285                 int *fds;
286         } instances;
287         bpf_program_prep_t preprocessor;
288
289         struct bpf_object *obj;
290         void *priv;
291         bpf_program_clear_priv_t clear_priv;
292
293         bool load;
294         bool mark_btf_static;
295         enum bpf_prog_type type;
296         enum bpf_attach_type expected_attach_type;
297         int prog_ifindex;
298         __u32 attach_btf_obj_fd;
299         __u32 attach_btf_id;
300         __u32 attach_prog_fd;
301         void *func_info;
302         __u32 func_info_rec_size;
303         __u32 func_info_cnt;
304
305         void *line_info;
306         __u32 line_info_rec_size;
307         __u32 line_info_cnt;
308         __u32 prog_flags;
309 };
310
311 struct bpf_struct_ops {
312         const char *tname;
313         const struct btf_type *type;
314         struct bpf_program **progs;
315         __u32 *kern_func_off;
316         /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
317         void *data;
318         /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
319          *      btf_vmlinux's format.
320          * struct bpf_struct_ops_tcp_congestion_ops {
321          *      [... some other kernel fields ...]
322          *      struct tcp_congestion_ops data;
323          * }
324          * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
325          * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
326          * from "data".
327          */
328         void *kern_vdata;
329         __u32 type_id;
330 };
331
332 #define DATA_SEC ".data"
333 #define BSS_SEC ".bss"
334 #define RODATA_SEC ".rodata"
335 #define KCONFIG_SEC ".kconfig"
336 #define KSYMS_SEC ".ksyms"
337 #define STRUCT_OPS_SEC ".struct_ops"
338
339 enum libbpf_map_type {
340         LIBBPF_MAP_UNSPEC,
341         LIBBPF_MAP_DATA,
342         LIBBPF_MAP_BSS,
343         LIBBPF_MAP_RODATA,
344         LIBBPF_MAP_KCONFIG,
345 };
346
347 static const char * const libbpf_type_to_btf_name[] = {
348         [LIBBPF_MAP_DATA]       = DATA_SEC,
349         [LIBBPF_MAP_BSS]        = BSS_SEC,
350         [LIBBPF_MAP_RODATA]     = RODATA_SEC,
351         [LIBBPF_MAP_KCONFIG]    = KCONFIG_SEC,
352 };
353
354 struct bpf_map {
355         char *name;
356         int fd;
357         int sec_idx;
358         size_t sec_offset;
359         int map_ifindex;
360         int inner_map_fd;
361         struct bpf_map_def def;
362         __u32 numa_node;
363         __u32 btf_var_idx;
364         __u32 btf_key_type_id;
365         __u32 btf_value_type_id;
366         __u32 btf_vmlinux_value_type_id;
367         void *priv;
368         bpf_map_clear_priv_t clear_priv;
369         enum libbpf_map_type libbpf_type;
370         void *mmaped;
371         struct bpf_struct_ops *st_ops;
372         struct bpf_map *inner_map;
373         void **init_slots;
374         int init_slots_sz;
375         char *pin_path;
376         bool pinned;
377         bool reused;
378 };
379
380 enum extern_type {
381         EXT_UNKNOWN,
382         EXT_KCFG,
383         EXT_KSYM,
384 };
385
386 enum kcfg_type {
387         KCFG_UNKNOWN,
388         KCFG_CHAR,
389         KCFG_BOOL,
390         KCFG_INT,
391         KCFG_TRISTATE,
392         KCFG_CHAR_ARR,
393 };
394
395 struct extern_desc {
396         enum extern_type type;
397         int sym_idx;
398         int btf_id;
399         int sec_btf_id;
400         const char *name;
401         bool is_set;
402         bool is_weak;
403         union {
404                 struct {
405                         enum kcfg_type type;
406                         int sz;
407                         int align;
408                         int data_off;
409                         bool is_signed;
410                 } kcfg;
411                 struct {
412                         unsigned long long addr;
413
414                         /* target btf_id of the corresponding kernel var. */
415                         int kernel_btf_obj_fd;
416                         int kernel_btf_id;
417
418                         /* local btf_id of the ksym extern's type. */
419                         __u32 type_id;
420                 } ksym;
421         };
422 };
423
424 static LIST_HEAD(bpf_objects_list);
425
426 struct module_btf {
427         struct btf *btf;
428         char *name;
429         __u32 id;
430         int fd;
431 };
432
433 struct bpf_object {
434         char name[BPF_OBJ_NAME_LEN];
435         char license[64];
436         __u32 kern_version;
437
438         struct bpf_program *programs;
439         size_t nr_programs;
440         struct bpf_map *maps;
441         size_t nr_maps;
442         size_t maps_cap;
443
444         char *kconfig;
445         struct extern_desc *externs;
446         int nr_extern;
447         int kconfig_map_idx;
448         int rodata_map_idx;
449
450         bool loaded;
451         bool has_subcalls;
452
453         struct bpf_gen *gen_loader;
454
455         /*
456          * Information when doing elf related work. Only valid if fd
457          * is valid.
458          */
459         struct {
460                 int fd;
461                 const void *obj_buf;
462                 size_t obj_buf_sz;
463                 Elf *elf;
464                 GElf_Ehdr ehdr;
465                 Elf_Data *symbols;
466                 Elf_Data *data;
467                 Elf_Data *rodata;
468                 Elf_Data *bss;
469                 Elf_Data *st_ops_data;
470                 size_t shstrndx; /* section index for section name strings */
471                 size_t strtabidx;
472                 struct {
473                         GElf_Shdr shdr;
474                         Elf_Data *data;
475                 } *reloc_sects;
476                 int nr_reloc_sects;
477                 int maps_shndx;
478                 int btf_maps_shndx;
479                 __u32 btf_maps_sec_btf_id;
480                 int text_shndx;
481                 int symbols_shndx;
482                 int data_shndx;
483                 int rodata_shndx;
484                 int bss_shndx;
485                 int st_ops_shndx;
486         } efile;
487         /*
488          * All loaded bpf_object is linked in a list, which is
489          * hidden to caller. bpf_objects__<func> handlers deal with
490          * all objects.
491          */
492         struct list_head list;
493
494         struct btf *btf;
495         struct btf_ext *btf_ext;
496
497         /* Parse and load BTF vmlinux if any of the programs in the object need
498          * it at load time.
499          */
500         struct btf *btf_vmlinux;
501         /* Path to the custom BTF to be used for BPF CO-RE relocations as an
502          * override for vmlinux BTF.
503          */
504         char *btf_custom_path;
505         /* vmlinux BTF override for CO-RE relocations */
506         struct btf *btf_vmlinux_override;
507         /* Lazily initialized kernel module BTFs */
508         struct module_btf *btf_modules;
509         bool btf_modules_loaded;
510         size_t btf_module_cnt;
511         size_t btf_module_cap;
512
513         void *priv;
514         bpf_object_clear_priv_t clear_priv;
515
516         char path[];
517 };
518 #define obj_elf_valid(o)        ((o)->efile.elf)
519
520 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
521 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
522 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
523 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
524 static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
525 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
526 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
527
528 void bpf_program__unload(struct bpf_program *prog)
529 {
530         int i;
531
532         if (!prog)
533                 return;
534
535         /*
536          * If the object is opened but the program was never loaded,
537          * it is possible that prog->instances.nr == -1.
538          */
539         if (prog->instances.nr > 0) {
540                 for (i = 0; i < prog->instances.nr; i++)
541                         zclose(prog->instances.fds[i]);
542         } else if (prog->instances.nr != -1) {
543                 pr_warn("Internal error: instances.nr is %d\n",
544                         prog->instances.nr);
545         }
546
547         prog->instances.nr = -1;
548         zfree(&prog->instances.fds);
549
550         zfree(&prog->func_info);
551         zfree(&prog->line_info);
552 }
553
554 static void bpf_program__exit(struct bpf_program *prog)
555 {
556         if (!prog)
557                 return;
558
559         if (prog->clear_priv)
560                 prog->clear_priv(prog, prog->priv);
561
562         prog->priv = NULL;
563         prog->clear_priv = NULL;
564
565         bpf_program__unload(prog);
566         zfree(&prog->name);
567         zfree(&prog->sec_name);
568         zfree(&prog->pin_name);
569         zfree(&prog->insns);
570         zfree(&prog->reloc_desc);
571
572         prog->nr_reloc = 0;
573         prog->insns_cnt = 0;
574         prog->sec_idx = -1;
575 }
576
577 static char *__bpf_program__pin_name(struct bpf_program *prog)
578 {
579         char *name, *p;
580
581         name = p = strdup(prog->sec_name);
582         while ((p = strchr(p, '/')))
583                 *p = '_';
584
585         return name;
586 }
587
588 static bool insn_is_subprog_call(const struct bpf_insn *insn)
589 {
590         return BPF_CLASS(insn->code) == BPF_JMP &&
591                BPF_OP(insn->code) == BPF_CALL &&
592                BPF_SRC(insn->code) == BPF_K &&
593                insn->src_reg == BPF_PSEUDO_CALL &&
594                insn->dst_reg == 0 &&
595                insn->off == 0;
596 }
597
598 static bool is_ldimm64_insn(struct bpf_insn *insn)
599 {
600         return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
601 }
602
603 static bool is_call_insn(const struct bpf_insn *insn)
604 {
605         return insn->code == (BPF_JMP | BPF_CALL);
606 }
607
608 static bool insn_is_pseudo_func(struct bpf_insn *insn)
609 {
610         return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
611 }
612
613 static int
614 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
615                       const char *name, size_t sec_idx, const char *sec_name,
616                       size_t sec_off, void *insn_data, size_t insn_data_sz)
617 {
618         if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
619                 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
620                         sec_name, name, sec_off, insn_data_sz);
621                 return -EINVAL;
622         }
623
624         memset(prog, 0, sizeof(*prog));
625         prog->obj = obj;
626
627         prog->sec_idx = sec_idx;
628         prog->sec_insn_off = sec_off / BPF_INSN_SZ;
629         prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
630         /* insns_cnt can later be increased by appending used subprograms */
631         prog->insns_cnt = prog->sec_insn_cnt;
632
633         prog->type = BPF_PROG_TYPE_UNSPEC;
634         prog->load = true;
635
636         prog->instances.fds = NULL;
637         prog->instances.nr = -1;
638
639         prog->sec_name = strdup(sec_name);
640         if (!prog->sec_name)
641                 goto errout;
642
643         prog->name = strdup(name);
644         if (!prog->name)
645                 goto errout;
646
647         prog->pin_name = __bpf_program__pin_name(prog);
648         if (!prog->pin_name)
649                 goto errout;
650
651         prog->insns = malloc(insn_data_sz);
652         if (!prog->insns)
653                 goto errout;
654         memcpy(prog->insns, insn_data, insn_data_sz);
655
656         return 0;
657 errout:
658         pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
659         bpf_program__exit(prog);
660         return -ENOMEM;
661 }
662
663 static int
664 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
665                          const char *sec_name, int sec_idx)
666 {
667         Elf_Data *symbols = obj->efile.symbols;
668         struct bpf_program *prog, *progs;
669         void *data = sec_data->d_buf;
670         size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
671         int nr_progs, err, i;
672         const char *name;
673         GElf_Sym sym;
674
675         progs = obj->programs;
676         nr_progs = obj->nr_programs;
677         nr_syms = symbols->d_size / sizeof(GElf_Sym);
678         sec_off = 0;
679
680         for (i = 0; i < nr_syms; i++) {
681                 if (!gelf_getsym(symbols, i, &sym))
682                         continue;
683                 if (sym.st_shndx != sec_idx)
684                         continue;
685                 if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
686                         continue;
687
688                 prog_sz = sym.st_size;
689                 sec_off = sym.st_value;
690
691                 name = elf_sym_str(obj, sym.st_name);
692                 if (!name) {
693                         pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
694                                 sec_name, sec_off);
695                         return -LIBBPF_ERRNO__FORMAT;
696                 }
697
698                 if (sec_off + prog_sz > sec_sz) {
699                         pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
700                                 sec_name, sec_off);
701                         return -LIBBPF_ERRNO__FORMAT;
702                 }
703
704                 if (sec_idx != obj->efile.text_shndx && GELF_ST_BIND(sym.st_info) == STB_LOCAL) {
705                         pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
706                         return -ENOTSUP;
707                 }
708
709                 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
710                          sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
711
712                 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
713                 if (!progs) {
714                         /*
715                          * In this case the original obj->programs
716                          * is still valid, so don't need special treat for
717                          * bpf_close_object().
718                          */
719                         pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
720                                 sec_name, name);
721                         return -ENOMEM;
722                 }
723                 obj->programs = progs;
724
725                 prog = &progs[nr_progs];
726
727                 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
728                                             sec_off, data + sec_off, prog_sz);
729                 if (err)
730                         return err;
731
732                 /* if function is a global/weak symbol, but has restricted
733                  * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
734                  * as static to enable more permissive BPF verification mode
735                  * with more outside context available to BPF verifier
736                  */
737                 if (GELF_ST_BIND(sym.st_info) != STB_LOCAL
738                     && (GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN
739                         || GELF_ST_VISIBILITY(sym.st_other) == STV_INTERNAL))
740                         prog->mark_btf_static = true;
741
742                 nr_progs++;
743                 obj->nr_programs = nr_progs;
744         }
745
746         return 0;
747 }
748
749 static __u32 get_kernel_version(void)
750 {
751         __u32 major, minor, patch;
752         struct utsname info;
753
754         uname(&info);
755         if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
756                 return 0;
757         return KERNEL_VERSION(major, minor, patch);
758 }
759
760 static const struct btf_member *
761 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
762 {
763         struct btf_member *m;
764         int i;
765
766         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
767                 if (btf_member_bit_offset(t, i) == bit_offset)
768                         return m;
769         }
770
771         return NULL;
772 }
773
774 static const struct btf_member *
775 find_member_by_name(const struct btf *btf, const struct btf_type *t,
776                     const char *name)
777 {
778         struct btf_member *m;
779         int i;
780
781         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
782                 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
783                         return m;
784         }
785
786         return NULL;
787 }
788
789 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
790 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
791                                    const char *name, __u32 kind);
792
793 static int
794 find_struct_ops_kern_types(const struct btf *btf, const char *tname,
795                            const struct btf_type **type, __u32 *type_id,
796                            const struct btf_type **vtype, __u32 *vtype_id,
797                            const struct btf_member **data_member)
798 {
799         const struct btf_type *kern_type, *kern_vtype;
800         const struct btf_member *kern_data_member;
801         __s32 kern_vtype_id, kern_type_id;
802         __u32 i;
803
804         kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
805         if (kern_type_id < 0) {
806                 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
807                         tname);
808                 return kern_type_id;
809         }
810         kern_type = btf__type_by_id(btf, kern_type_id);
811
812         /* Find the corresponding "map_value" type that will be used
813          * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
814          * find "struct bpf_struct_ops_tcp_congestion_ops" from the
815          * btf_vmlinux.
816          */
817         kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
818                                                 tname, BTF_KIND_STRUCT);
819         if (kern_vtype_id < 0) {
820                 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
821                         STRUCT_OPS_VALUE_PREFIX, tname);
822                 return kern_vtype_id;
823         }
824         kern_vtype = btf__type_by_id(btf, kern_vtype_id);
825
826         /* Find "struct tcp_congestion_ops" from
827          * struct bpf_struct_ops_tcp_congestion_ops {
828          *      [ ... ]
829          *      struct tcp_congestion_ops data;
830          * }
831          */
832         kern_data_member = btf_members(kern_vtype);
833         for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
834                 if (kern_data_member->type == kern_type_id)
835                         break;
836         }
837         if (i == btf_vlen(kern_vtype)) {
838                 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
839                         tname, STRUCT_OPS_VALUE_PREFIX, tname);
840                 return -EINVAL;
841         }
842
843         *type = kern_type;
844         *type_id = kern_type_id;
845         *vtype = kern_vtype;
846         *vtype_id = kern_vtype_id;
847         *data_member = kern_data_member;
848
849         return 0;
850 }
851
852 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
853 {
854         return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
855 }
856
857 /* Init the map's fields that depend on kern_btf */
858 static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
859                                          const struct btf *btf,
860                                          const struct btf *kern_btf)
861 {
862         const struct btf_member *member, *kern_member, *kern_data_member;
863         const struct btf_type *type, *kern_type, *kern_vtype;
864         __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
865         struct bpf_struct_ops *st_ops;
866         void *data, *kern_data;
867         const char *tname;
868         int err;
869
870         st_ops = map->st_ops;
871         type = st_ops->type;
872         tname = st_ops->tname;
873         err = find_struct_ops_kern_types(kern_btf, tname,
874                                          &kern_type, &kern_type_id,
875                                          &kern_vtype, &kern_vtype_id,
876                                          &kern_data_member);
877         if (err)
878                 return err;
879
880         pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
881                  map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
882
883         map->def.value_size = kern_vtype->size;
884         map->btf_vmlinux_value_type_id = kern_vtype_id;
885
886         st_ops->kern_vdata = calloc(1, kern_vtype->size);
887         if (!st_ops->kern_vdata)
888                 return -ENOMEM;
889
890         data = st_ops->data;
891         kern_data_off = kern_data_member->offset / 8;
892         kern_data = st_ops->kern_vdata + kern_data_off;
893
894         member = btf_members(type);
895         for (i = 0; i < btf_vlen(type); i++, member++) {
896                 const struct btf_type *mtype, *kern_mtype;
897                 __u32 mtype_id, kern_mtype_id;
898                 void *mdata, *kern_mdata;
899                 __s64 msize, kern_msize;
900                 __u32 moff, kern_moff;
901                 __u32 kern_member_idx;
902                 const char *mname;
903
904                 mname = btf__name_by_offset(btf, member->name_off);
905                 kern_member = find_member_by_name(kern_btf, kern_type, mname);
906                 if (!kern_member) {
907                         pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
908                                 map->name, mname);
909                         return -ENOTSUP;
910                 }
911
912                 kern_member_idx = kern_member - btf_members(kern_type);
913                 if (btf_member_bitfield_size(type, i) ||
914                     btf_member_bitfield_size(kern_type, kern_member_idx)) {
915                         pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
916                                 map->name, mname);
917                         return -ENOTSUP;
918                 }
919
920                 moff = member->offset / 8;
921                 kern_moff = kern_member->offset / 8;
922
923                 mdata = data + moff;
924                 kern_mdata = kern_data + kern_moff;
925
926                 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
927                 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
928                                                     &kern_mtype_id);
929                 if (BTF_INFO_KIND(mtype->info) !=
930                     BTF_INFO_KIND(kern_mtype->info)) {
931                         pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
932                                 map->name, mname, BTF_INFO_KIND(mtype->info),
933                                 BTF_INFO_KIND(kern_mtype->info));
934                         return -ENOTSUP;
935                 }
936
937                 if (btf_is_ptr(mtype)) {
938                         struct bpf_program *prog;
939
940                         prog = st_ops->progs[i];
941                         if (!prog)
942                                 continue;
943
944                         kern_mtype = skip_mods_and_typedefs(kern_btf,
945                                                             kern_mtype->type,
946                                                             &kern_mtype_id);
947
948                         /* mtype->type must be a func_proto which was
949                          * guaranteed in bpf_object__collect_st_ops_relos(),
950                          * so only check kern_mtype for func_proto here.
951                          */
952                         if (!btf_is_func_proto(kern_mtype)) {
953                                 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
954                                         map->name, mname);
955                                 return -ENOTSUP;
956                         }
957
958                         prog->attach_btf_id = kern_type_id;
959                         prog->expected_attach_type = kern_member_idx;
960
961                         st_ops->kern_func_off[i] = kern_data_off + kern_moff;
962
963                         pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
964                                  map->name, mname, prog->name, moff,
965                                  kern_moff);
966
967                         continue;
968                 }
969
970                 msize = btf__resolve_size(btf, mtype_id);
971                 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
972                 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
973                         pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
974                                 map->name, mname, (ssize_t)msize,
975                                 (ssize_t)kern_msize);
976                         return -ENOTSUP;
977                 }
978
979                 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
980                          map->name, mname, (unsigned int)msize,
981                          moff, kern_moff);
982                 memcpy(kern_mdata, mdata, msize);
983         }
984
985         return 0;
986 }
987
988 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
989 {
990         struct bpf_map *map;
991         size_t i;
992         int err;
993
994         for (i = 0; i < obj->nr_maps; i++) {
995                 map = &obj->maps[i];
996
997                 if (!bpf_map__is_struct_ops(map))
998                         continue;
999
1000                 err = bpf_map__init_kern_struct_ops(map, obj->btf,
1001                                                     obj->btf_vmlinux);
1002                 if (err)
1003                         return err;
1004         }
1005
1006         return 0;
1007 }
1008
1009 static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
1010 {
1011         const struct btf_type *type, *datasec;
1012         const struct btf_var_secinfo *vsi;
1013         struct bpf_struct_ops *st_ops;
1014         const char *tname, *var_name;
1015         __s32 type_id, datasec_id;
1016         const struct btf *btf;
1017         struct bpf_map *map;
1018         __u32 i;
1019
1020         if (obj->efile.st_ops_shndx == -1)
1021                 return 0;
1022
1023         btf = obj->btf;
1024         datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
1025                                             BTF_KIND_DATASEC);
1026         if (datasec_id < 0) {
1027                 pr_warn("struct_ops init: DATASEC %s not found\n",
1028                         STRUCT_OPS_SEC);
1029                 return -EINVAL;
1030         }
1031
1032         datasec = btf__type_by_id(btf, datasec_id);
1033         vsi = btf_var_secinfos(datasec);
1034         for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1035                 type = btf__type_by_id(obj->btf, vsi->type);
1036                 var_name = btf__name_by_offset(obj->btf, type->name_off);
1037
1038                 type_id = btf__resolve_type(obj->btf, vsi->type);
1039                 if (type_id < 0) {
1040                         pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1041                                 vsi->type, STRUCT_OPS_SEC);
1042                         return -EINVAL;
1043                 }
1044
1045                 type = btf__type_by_id(obj->btf, type_id);
1046                 tname = btf__name_by_offset(obj->btf, type->name_off);
1047                 if (!tname[0]) {
1048                         pr_warn("struct_ops init: anonymous type is not supported\n");
1049                         return -ENOTSUP;
1050                 }
1051                 if (!btf_is_struct(type)) {
1052                         pr_warn("struct_ops init: %s is not a struct\n", tname);
1053                         return -EINVAL;
1054                 }
1055
1056                 map = bpf_object__add_map(obj);
1057                 if (IS_ERR(map))
1058                         return PTR_ERR(map);
1059
1060                 map->sec_idx = obj->efile.st_ops_shndx;
1061                 map->sec_offset = vsi->offset;
1062                 map->name = strdup(var_name);
1063                 if (!map->name)
1064                         return -ENOMEM;
1065
1066                 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1067                 map->def.key_size = sizeof(int);
1068                 map->def.value_size = type->size;
1069                 map->def.max_entries = 1;
1070
1071                 map->st_ops = calloc(1, sizeof(*map->st_ops));
1072                 if (!map->st_ops)
1073                         return -ENOMEM;
1074                 st_ops = map->st_ops;
1075                 st_ops->data = malloc(type->size);
1076                 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1077                 st_ops->kern_func_off = malloc(btf_vlen(type) *
1078                                                sizeof(*st_ops->kern_func_off));
1079                 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1080                         return -ENOMEM;
1081
1082                 if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
1083                         pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1084                                 var_name, STRUCT_OPS_SEC);
1085                         return -EINVAL;
1086                 }
1087
1088                 memcpy(st_ops->data,
1089                        obj->efile.st_ops_data->d_buf + vsi->offset,
1090                        type->size);
1091                 st_ops->tname = tname;
1092                 st_ops->type = type;
1093                 st_ops->type_id = type_id;
1094
1095                 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1096                          tname, type_id, var_name, vsi->offset);
1097         }
1098
1099         return 0;
1100 }
1101
1102 static struct bpf_object *bpf_object__new(const char *path,
1103                                           const void *obj_buf,
1104                                           size_t obj_buf_sz,
1105                                           const char *obj_name)
1106 {
1107         struct bpf_object *obj;
1108         char *end;
1109
1110         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1111         if (!obj) {
1112                 pr_warn("alloc memory failed for %s\n", path);
1113                 return ERR_PTR(-ENOMEM);
1114         }
1115
1116         strcpy(obj->path, path);
1117         if (obj_name) {
1118                 strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
1119                 obj->name[sizeof(obj->name) - 1] = 0;
1120         } else {
1121                 /* Using basename() GNU version which doesn't modify arg. */
1122                 strncpy(obj->name, basename((void *)path),
1123                         sizeof(obj->name) - 1);
1124                 end = strchr(obj->name, '.');
1125                 if (end)
1126                         *end = 0;
1127         }
1128
1129         obj->efile.fd = -1;
1130         /*
1131          * Caller of this function should also call
1132          * bpf_object__elf_finish() after data collection to return
1133          * obj_buf to user. If not, we should duplicate the buffer to
1134          * avoid user freeing them before elf finish.
1135          */
1136         obj->efile.obj_buf = obj_buf;
1137         obj->efile.obj_buf_sz = obj_buf_sz;
1138         obj->efile.maps_shndx = -1;
1139         obj->efile.btf_maps_shndx = -1;
1140         obj->efile.data_shndx = -1;
1141         obj->efile.rodata_shndx = -1;
1142         obj->efile.bss_shndx = -1;
1143         obj->efile.st_ops_shndx = -1;
1144         obj->kconfig_map_idx = -1;
1145         obj->rodata_map_idx = -1;
1146
1147         obj->kern_version = get_kernel_version();
1148         obj->loaded = false;
1149
1150         INIT_LIST_HEAD(&obj->list);
1151         list_add(&obj->list, &bpf_objects_list);
1152         return obj;
1153 }
1154
1155 static void bpf_object__elf_finish(struct bpf_object *obj)
1156 {
1157         if (!obj_elf_valid(obj))
1158                 return;
1159
1160         if (obj->efile.elf) {
1161                 elf_end(obj->efile.elf);
1162                 obj->efile.elf = NULL;
1163         }
1164         obj->efile.symbols = NULL;
1165         obj->efile.data = NULL;
1166         obj->efile.rodata = NULL;
1167         obj->efile.bss = NULL;
1168         obj->efile.st_ops_data = NULL;
1169
1170         zfree(&obj->efile.reloc_sects);
1171         obj->efile.nr_reloc_sects = 0;
1172         zclose(obj->efile.fd);
1173         obj->efile.obj_buf = NULL;
1174         obj->efile.obj_buf_sz = 0;
1175 }
1176
1177 static int bpf_object__elf_init(struct bpf_object *obj)
1178 {
1179         int err = 0;
1180         GElf_Ehdr *ep;
1181
1182         if (obj_elf_valid(obj)) {
1183                 pr_warn("elf: init internal error\n");
1184                 return -LIBBPF_ERRNO__LIBELF;
1185         }
1186
1187         if (obj->efile.obj_buf_sz > 0) {
1188                 /*
1189                  * obj_buf should have been validated by
1190                  * bpf_object__open_buffer().
1191                  */
1192                 obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
1193                                             obj->efile.obj_buf_sz);
1194         } else {
1195                 obj->efile.fd = open(obj->path, O_RDONLY);
1196                 if (obj->efile.fd < 0) {
1197                         char errmsg[STRERR_BUFSIZE], *cp;
1198
1199                         err = -errno;
1200                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1201                         pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1202                         return err;
1203                 }
1204
1205                 obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1206         }
1207
1208         if (!obj->efile.elf) {
1209                 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1210                 err = -LIBBPF_ERRNO__LIBELF;
1211                 goto errout;
1212         }
1213
1214         if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
1215                 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1216                 err = -LIBBPF_ERRNO__FORMAT;
1217                 goto errout;
1218         }
1219         ep = &obj->efile.ehdr;
1220
1221         if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
1222                 pr_warn("elf: failed to get section names section index for %s: %s\n",
1223                         obj->path, elf_errmsg(-1));
1224                 err = -LIBBPF_ERRNO__FORMAT;
1225                 goto errout;
1226         }
1227
1228         /* Elf is corrupted/truncated, avoid calling elf_strptr. */
1229         if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
1230                 pr_warn("elf: failed to get section names strings from %s: %s\n",
1231                         obj->path, elf_errmsg(-1));
1232                 err = -LIBBPF_ERRNO__FORMAT;
1233                 goto errout;
1234         }
1235
1236         /* Old LLVM set e_machine to EM_NONE */
1237         if (ep->e_type != ET_REL ||
1238             (ep->e_machine && ep->e_machine != EM_BPF)) {
1239                 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1240                 err = -LIBBPF_ERRNO__FORMAT;
1241                 goto errout;
1242         }
1243
1244         return 0;
1245 errout:
1246         bpf_object__elf_finish(obj);
1247         return err;
1248 }
1249
1250 static int bpf_object__check_endianness(struct bpf_object *obj)
1251 {
1252 #if __BYTE_ORDER == __LITTLE_ENDIAN
1253         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
1254                 return 0;
1255 #elif __BYTE_ORDER == __BIG_ENDIAN
1256         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
1257                 return 0;
1258 #else
1259 # error "Unrecognized __BYTE_ORDER__"
1260 #endif
1261         pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1262         return -LIBBPF_ERRNO__ENDIAN;
1263 }
1264
1265 static int
1266 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1267 {
1268         memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
1269         pr_debug("license of %s is %s\n", obj->path, obj->license);
1270         return 0;
1271 }
1272
1273 static int
1274 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1275 {
1276         __u32 kver;
1277
1278         if (size != sizeof(kver)) {
1279                 pr_warn("invalid kver section in %s\n", obj->path);
1280                 return -LIBBPF_ERRNO__FORMAT;
1281         }
1282         memcpy(&kver, data, sizeof(kver));
1283         obj->kern_version = kver;
1284         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1285         return 0;
1286 }
1287
1288 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1289 {
1290         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1291             type == BPF_MAP_TYPE_HASH_OF_MAPS)
1292                 return true;
1293         return false;
1294 }
1295
1296 int bpf_object__section_size(const struct bpf_object *obj, const char *name,
1297                              __u32 *size)
1298 {
1299         int ret = -ENOENT;
1300
1301         *size = 0;
1302         if (!name) {
1303                 return -EINVAL;
1304         } else if (!strcmp(name, DATA_SEC)) {
1305                 if (obj->efile.data)
1306                         *size = obj->efile.data->d_size;
1307         } else if (!strcmp(name, BSS_SEC)) {
1308                 if (obj->efile.bss)
1309                         *size = obj->efile.bss->d_size;
1310         } else if (!strcmp(name, RODATA_SEC)) {
1311                 if (obj->efile.rodata)
1312                         *size = obj->efile.rodata->d_size;
1313         } else if (!strcmp(name, STRUCT_OPS_SEC)) {
1314                 if (obj->efile.st_ops_data)
1315                         *size = obj->efile.st_ops_data->d_size;
1316         } else {
1317                 Elf_Scn *scn = elf_sec_by_name(obj, name);
1318                 Elf_Data *data = elf_sec_data(obj, scn);
1319
1320                 if (data) {
1321                         ret = 0; /* found it */
1322                         *size = data->d_size;
1323                 }
1324         }
1325
1326         return *size ? 0 : ret;
1327 }
1328
1329 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
1330                                 __u32 *off)
1331 {
1332         Elf_Data *symbols = obj->efile.symbols;
1333         const char *sname;
1334         size_t si;
1335
1336         if (!name || !off)
1337                 return -EINVAL;
1338
1339         for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
1340                 GElf_Sym sym;
1341
1342                 if (!gelf_getsym(symbols, si, &sym))
1343                         continue;
1344                 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1345                     GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
1346                         continue;
1347
1348                 sname = elf_sym_str(obj, sym.st_name);
1349                 if (!sname) {
1350                         pr_warn("failed to get sym name string for var %s\n",
1351                                 name);
1352                         return -EIO;
1353                 }
1354                 if (strcmp(name, sname) == 0) {
1355                         *off = sym.st_value;
1356                         return 0;
1357                 }
1358         }
1359
1360         return -ENOENT;
1361 }
1362
1363 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1364 {
1365         struct bpf_map *new_maps;
1366         size_t new_cap;
1367         int i;
1368
1369         if (obj->nr_maps < obj->maps_cap)
1370                 return &obj->maps[obj->nr_maps++];
1371
1372         new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
1373         new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
1374         if (!new_maps) {
1375                 pr_warn("alloc maps for object failed\n");
1376                 return ERR_PTR(-ENOMEM);
1377         }
1378
1379         obj->maps_cap = new_cap;
1380         obj->maps = new_maps;
1381
1382         /* zero out new maps */
1383         memset(obj->maps + obj->nr_maps, 0,
1384                (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
1385         /*
1386          * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
1387          * when failure (zclose won't close negative fd)).
1388          */
1389         for (i = obj->nr_maps; i < obj->maps_cap; i++) {
1390                 obj->maps[i].fd = -1;
1391                 obj->maps[i].inner_map_fd = -1;
1392         }
1393
1394         return &obj->maps[obj->nr_maps++];
1395 }
1396
1397 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1398 {
1399         long page_sz = sysconf(_SC_PAGE_SIZE);
1400         size_t map_sz;
1401
1402         map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
1403         map_sz = roundup(map_sz, page_sz);
1404         return map_sz;
1405 }
1406
1407 static char *internal_map_name(struct bpf_object *obj,
1408                                enum libbpf_map_type type)
1409 {
1410         char map_name[BPF_OBJ_NAME_LEN], *p;
1411         const char *sfx = libbpf_type_to_btf_name[type];
1412         int sfx_len = max((size_t)7, strlen(sfx));
1413         int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
1414                           strlen(obj->name));
1415
1416         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1417                  sfx_len, libbpf_type_to_btf_name[type]);
1418
1419         /* sanitise map name to characters allowed by kernel */
1420         for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1421                 if (!isalnum(*p) && *p != '_' && *p != '.')
1422                         *p = '_';
1423
1424         return strdup(map_name);
1425 }
1426
1427 static int
1428 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1429                               int sec_idx, void *data, size_t data_sz)
1430 {
1431         struct bpf_map_def *def;
1432         struct bpf_map *map;
1433         int err;
1434
1435         map = bpf_object__add_map(obj);
1436         if (IS_ERR(map))
1437                 return PTR_ERR(map);
1438
1439         map->libbpf_type = type;
1440         map->sec_idx = sec_idx;
1441         map->sec_offset = 0;
1442         map->name = internal_map_name(obj, type);
1443         if (!map->name) {
1444                 pr_warn("failed to alloc map name\n");
1445                 return -ENOMEM;
1446         }
1447
1448         def = &map->def;
1449         def->type = BPF_MAP_TYPE_ARRAY;
1450         def->key_size = sizeof(int);
1451         def->value_size = data_sz;
1452         def->max_entries = 1;
1453         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1454                          ? BPF_F_RDONLY_PROG : 0;
1455         def->map_flags |= BPF_F_MMAPABLE;
1456
1457         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1458                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
1459
1460         map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
1461                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1462         if (map->mmaped == MAP_FAILED) {
1463                 err = -errno;
1464                 map->mmaped = NULL;
1465                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1466                         map->name, err);
1467                 zfree(&map->name);
1468                 return err;
1469         }
1470
1471         if (data)
1472                 memcpy(map->mmaped, data, data_sz);
1473
1474         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1475         return 0;
1476 }
1477
1478 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1479 {
1480         int err;
1481
1482         /*
1483          * Populate obj->maps with libbpf internal maps.
1484          */
1485         if (obj->efile.data_shndx >= 0) {
1486                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1487                                                     obj->efile.data_shndx,
1488                                                     obj->efile.data->d_buf,
1489                                                     obj->efile.data->d_size);
1490                 if (err)
1491                         return err;
1492         }
1493         if (obj->efile.rodata_shndx >= 0) {
1494                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1495                                                     obj->efile.rodata_shndx,
1496                                                     obj->efile.rodata->d_buf,
1497                                                     obj->efile.rodata->d_size);
1498                 if (err)
1499                         return err;
1500
1501                 obj->rodata_map_idx = obj->nr_maps - 1;
1502         }
1503         if (obj->efile.bss_shndx >= 0) {
1504                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1505                                                     obj->efile.bss_shndx,
1506                                                     NULL,
1507                                                     obj->efile.bss->d_size);
1508                 if (err)
1509                         return err;
1510         }
1511         return 0;
1512 }
1513
1514
1515 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1516                                                const void *name)
1517 {
1518         int i;
1519
1520         for (i = 0; i < obj->nr_extern; i++) {
1521                 if (strcmp(obj->externs[i].name, name) == 0)
1522                         return &obj->externs[i];
1523         }
1524         return NULL;
1525 }
1526
1527 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1528                               char value)
1529 {
1530         switch (ext->kcfg.type) {
1531         case KCFG_BOOL:
1532                 if (value == 'm') {
1533                         pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
1534                                 ext->name, value);
1535                         return -EINVAL;
1536                 }
1537                 *(bool *)ext_val = value == 'y' ? true : false;
1538                 break;
1539         case KCFG_TRISTATE:
1540                 if (value == 'y')
1541                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1542                 else if (value == 'm')
1543                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1544                 else /* value == 'n' */
1545                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1546                 break;
1547         case KCFG_CHAR:
1548                 *(char *)ext_val = value;
1549                 break;
1550         case KCFG_UNKNOWN:
1551         case KCFG_INT:
1552         case KCFG_CHAR_ARR:
1553         default:
1554                 pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
1555                         ext->name, value);
1556                 return -EINVAL;
1557         }
1558         ext->is_set = true;
1559         return 0;
1560 }
1561
1562 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1563                               const char *value)
1564 {
1565         size_t len;
1566
1567         if (ext->kcfg.type != KCFG_CHAR_ARR) {
1568                 pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
1569                 return -EINVAL;
1570         }
1571
1572         len = strlen(value);
1573         if (value[len - 1] != '"') {
1574                 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1575                         ext->name, value);
1576                 return -EINVAL;
1577         }
1578
1579         /* strip quotes */
1580         len -= 2;
1581         if (len >= ext->kcfg.sz) {
1582                 pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
1583                         ext->name, value, len, ext->kcfg.sz - 1);
1584                 len = ext->kcfg.sz - 1;
1585         }
1586         memcpy(ext_val, value + 1, len);
1587         ext_val[len] = '\0';
1588         ext->is_set = true;
1589         return 0;
1590 }
1591
1592 static int parse_u64(const char *value, __u64 *res)
1593 {
1594         char *value_end;
1595         int err;
1596
1597         errno = 0;
1598         *res = strtoull(value, &value_end, 0);
1599         if (errno) {
1600                 err = -errno;
1601                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1602                 return err;
1603         }
1604         if (*value_end) {
1605                 pr_warn("failed to parse '%s' as integer completely\n", value);
1606                 return -EINVAL;
1607         }
1608         return 0;
1609 }
1610
1611 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1612 {
1613         int bit_sz = ext->kcfg.sz * 8;
1614
1615         if (ext->kcfg.sz == 8)
1616                 return true;
1617
1618         /* Validate that value stored in u64 fits in integer of `ext->sz`
1619          * bytes size without any loss of information. If the target integer
1620          * is signed, we rely on the following limits of integer type of
1621          * Y bits and subsequent transformation:
1622          *
1623          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1624          *            0 <= X + 2^(Y-1) <= 2^Y - 1
1625          *            0 <= X + 2^(Y-1) <  2^Y
1626          *
1627          *  For unsigned target integer, check that all the (64 - Y) bits are
1628          *  zero.
1629          */
1630         if (ext->kcfg.is_signed)
1631                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1632         else
1633                 return (v >> bit_sz) == 0;
1634 }
1635
1636 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1637                               __u64 value)
1638 {
1639         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1640                 pr_warn("extern (kcfg) %s=%llu should be integer\n",
1641                         ext->name, (unsigned long long)value);
1642                 return -EINVAL;
1643         }
1644         if (!is_kcfg_value_in_range(ext, value)) {
1645                 pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
1646                         ext->name, (unsigned long long)value, ext->kcfg.sz);
1647                 return -ERANGE;
1648         }
1649         switch (ext->kcfg.sz) {
1650                 case 1: *(__u8 *)ext_val = value; break;
1651                 case 2: *(__u16 *)ext_val = value; break;
1652                 case 4: *(__u32 *)ext_val = value; break;
1653                 case 8: *(__u64 *)ext_val = value; break;
1654                 default:
1655                         return -EINVAL;
1656         }
1657         ext->is_set = true;
1658         return 0;
1659 }
1660
1661 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1662                                             char *buf, void *data)
1663 {
1664         struct extern_desc *ext;
1665         char *sep, *value;
1666         int len, err = 0;
1667         void *ext_val;
1668         __u64 num;
1669
1670         if (strncmp(buf, "CONFIG_", 7))
1671                 return 0;
1672
1673         sep = strchr(buf, '=');
1674         if (!sep) {
1675                 pr_warn("failed to parse '%s': no separator\n", buf);
1676                 return -EINVAL;
1677         }
1678
1679         /* Trim ending '\n' */
1680         len = strlen(buf);
1681         if (buf[len - 1] == '\n')
1682                 buf[len - 1] = '\0';
1683         /* Split on '=' and ensure that a value is present. */
1684         *sep = '\0';
1685         if (!sep[1]) {
1686                 *sep = '=';
1687                 pr_warn("failed to parse '%s': no value\n", buf);
1688                 return -EINVAL;
1689         }
1690
1691         ext = find_extern_by_name(obj, buf);
1692         if (!ext || ext->is_set)
1693                 return 0;
1694
1695         ext_val = data + ext->kcfg.data_off;
1696         value = sep + 1;
1697
1698         switch (*value) {
1699         case 'y': case 'n': case 'm':
1700                 err = set_kcfg_value_tri(ext, ext_val, *value);
1701                 break;
1702         case '"':
1703                 err = set_kcfg_value_str(ext, ext_val, value);
1704                 break;
1705         default:
1706                 /* assume integer */
1707                 err = parse_u64(value, &num);
1708                 if (err) {
1709                         pr_warn("extern (kcfg) %s=%s should be integer\n",
1710                                 ext->name, value);
1711                         return err;
1712                 }
1713                 err = set_kcfg_value_num(ext, ext_val, num);
1714                 break;
1715         }
1716         if (err)
1717                 return err;
1718         pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
1719         return 0;
1720 }
1721
1722 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1723 {
1724         char buf[PATH_MAX];
1725         struct utsname uts;
1726         int len, err = 0;
1727         gzFile file;
1728
1729         uname(&uts);
1730         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1731         if (len < 0)
1732                 return -EINVAL;
1733         else if (len >= PATH_MAX)
1734                 return -ENAMETOOLONG;
1735
1736         /* gzopen also accepts uncompressed files. */
1737         file = gzopen(buf, "r");
1738         if (!file)
1739                 file = gzopen("/proc/config.gz", "r");
1740
1741         if (!file) {
1742                 pr_warn("failed to open system Kconfig\n");
1743                 return -ENOENT;
1744         }
1745
1746         while (gzgets(file, buf, sizeof(buf))) {
1747                 err = bpf_object__process_kconfig_line(obj, buf, data);
1748                 if (err) {
1749                         pr_warn("error parsing system Kconfig line '%s': %d\n",
1750                                 buf, err);
1751                         goto out;
1752                 }
1753         }
1754
1755 out:
1756         gzclose(file);
1757         return err;
1758 }
1759
1760 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1761                                         const char *config, void *data)
1762 {
1763         char buf[PATH_MAX];
1764         int err = 0;
1765         FILE *file;
1766
1767         file = fmemopen((void *)config, strlen(config), "r");
1768         if (!file) {
1769                 err = -errno;
1770                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
1771                 return err;
1772         }
1773
1774         while (fgets(buf, sizeof(buf), file)) {
1775                 err = bpf_object__process_kconfig_line(obj, buf, data);
1776                 if (err) {
1777                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1778                                 buf, err);
1779                         break;
1780                 }
1781         }
1782
1783         fclose(file);
1784         return err;
1785 }
1786
1787 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
1788 {
1789         struct extern_desc *last_ext = NULL, *ext;
1790         size_t map_sz;
1791         int i, err;
1792
1793         for (i = 0; i < obj->nr_extern; i++) {
1794                 ext = &obj->externs[i];
1795                 if (ext->type == EXT_KCFG)
1796                         last_ext = ext;
1797         }
1798
1799         if (!last_ext)
1800                 return 0;
1801
1802         map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
1803         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
1804                                             obj->efile.symbols_shndx,
1805                                             NULL, map_sz);
1806         if (err)
1807                 return err;
1808
1809         obj->kconfig_map_idx = obj->nr_maps - 1;
1810
1811         return 0;
1812 }
1813
1814 static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
1815 {
1816         Elf_Data *symbols = obj->efile.symbols;
1817         int i, map_def_sz = 0, nr_maps = 0, nr_syms;
1818         Elf_Data *data = NULL;
1819         Elf_Scn *scn;
1820
1821         if (obj->efile.maps_shndx < 0)
1822                 return 0;
1823
1824         if (!symbols)
1825                 return -EINVAL;
1826
1827         scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
1828         data = elf_sec_data(obj, scn);
1829         if (!scn || !data) {
1830                 pr_warn("elf: failed to get legacy map definitions for %s\n",
1831                         obj->path);
1832                 return -EINVAL;
1833         }
1834
1835         /*
1836          * Count number of maps. Each map has a name.
1837          * Array of maps is not supported: only the first element is
1838          * considered.
1839          *
1840          * TODO: Detect array of map and report error.
1841          */
1842         nr_syms = symbols->d_size / sizeof(GElf_Sym);
1843         for (i = 0; i < nr_syms; i++) {
1844                 GElf_Sym sym;
1845
1846                 if (!gelf_getsym(symbols, i, &sym))
1847                         continue;
1848                 if (sym.st_shndx != obj->efile.maps_shndx)
1849                         continue;
1850                 nr_maps++;
1851         }
1852         /* Assume equally sized map definitions */
1853         pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
1854                  nr_maps, data->d_size, obj->path);
1855
1856         if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
1857                 pr_warn("elf: unable to determine legacy map definition size in %s\n",
1858                         obj->path);
1859                 return -EINVAL;
1860         }
1861         map_def_sz = data->d_size / nr_maps;
1862
1863         /* Fill obj->maps using data in "maps" section.  */
1864         for (i = 0; i < nr_syms; i++) {
1865                 GElf_Sym sym;
1866                 const char *map_name;
1867                 struct bpf_map_def *def;
1868                 struct bpf_map *map;
1869
1870                 if (!gelf_getsym(symbols, i, &sym))
1871                         continue;
1872                 if (sym.st_shndx != obj->efile.maps_shndx)
1873                         continue;
1874
1875                 map = bpf_object__add_map(obj);
1876                 if (IS_ERR(map))
1877                         return PTR_ERR(map);
1878
1879                 map_name = elf_sym_str(obj, sym.st_name);
1880                 if (!map_name) {
1881                         pr_warn("failed to get map #%d name sym string for obj %s\n",
1882                                 i, obj->path);
1883                         return -LIBBPF_ERRNO__FORMAT;
1884                 }
1885
1886                 if (GELF_ST_TYPE(sym.st_info) == STT_SECTION
1887                     || GELF_ST_BIND(sym.st_info) == STB_LOCAL) {
1888                         pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
1889                         return -ENOTSUP;
1890                 }
1891
1892                 map->libbpf_type = LIBBPF_MAP_UNSPEC;
1893                 map->sec_idx = sym.st_shndx;
1894                 map->sec_offset = sym.st_value;
1895                 pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
1896                          map_name, map->sec_idx, map->sec_offset);
1897                 if (sym.st_value + map_def_sz > data->d_size) {
1898                         pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
1899                                 obj->path, map_name);
1900                         return -EINVAL;
1901                 }
1902
1903                 map->name = strdup(map_name);
1904                 if (!map->name) {
1905                         pr_warn("failed to alloc map name\n");
1906                         return -ENOMEM;
1907                 }
1908                 pr_debug("map %d is \"%s\"\n", i, map->name);
1909                 def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
1910                 /*
1911                  * If the definition of the map in the object file fits in
1912                  * bpf_map_def, copy it.  Any extra fields in our version
1913                  * of bpf_map_def will default to zero as a result of the
1914                  * calloc above.
1915                  */
1916                 if (map_def_sz <= sizeof(struct bpf_map_def)) {
1917                         memcpy(&map->def, def, map_def_sz);
1918                 } else {
1919                         /*
1920                          * Here the map structure being read is bigger than what
1921                          * we expect, truncate if the excess bits are all zero.
1922                          * If they are not zero, reject this map as
1923                          * incompatible.
1924                          */
1925                         char *b;
1926
1927                         for (b = ((char *)def) + sizeof(struct bpf_map_def);
1928                              b < ((char *)def) + map_def_sz; b++) {
1929                                 if (*b != 0) {
1930                                         pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
1931                                                 obj->path, map_name);
1932                                         if (strict)
1933                                                 return -EINVAL;
1934                                 }
1935                         }
1936                         memcpy(&map->def, def, sizeof(struct bpf_map_def));
1937                 }
1938         }
1939         return 0;
1940 }
1941
1942 const struct btf_type *
1943 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
1944 {
1945         const struct btf_type *t = btf__type_by_id(btf, id);
1946
1947         if (res_id)
1948                 *res_id = id;
1949
1950         while (btf_is_mod(t) || btf_is_typedef(t)) {
1951                 if (res_id)
1952                         *res_id = t->type;
1953                 t = btf__type_by_id(btf, t->type);
1954         }
1955
1956         return t;
1957 }
1958
1959 static const struct btf_type *
1960 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
1961 {
1962         const struct btf_type *t;
1963
1964         t = skip_mods_and_typedefs(btf, id, NULL);
1965         if (!btf_is_ptr(t))
1966                 return NULL;
1967
1968         t = skip_mods_and_typedefs(btf, t->type, res_id);
1969
1970         return btf_is_func_proto(t) ? t : NULL;
1971 }
1972
1973 static const char *__btf_kind_str(__u16 kind)
1974 {
1975         switch (kind) {
1976         case BTF_KIND_UNKN: return "void";
1977         case BTF_KIND_INT: return "int";
1978         case BTF_KIND_PTR: return "ptr";
1979         case BTF_KIND_ARRAY: return "array";
1980         case BTF_KIND_STRUCT: return "struct";
1981         case BTF_KIND_UNION: return "union";
1982         case BTF_KIND_ENUM: return "enum";
1983         case BTF_KIND_FWD: return "fwd";
1984         case BTF_KIND_TYPEDEF: return "typedef";
1985         case BTF_KIND_VOLATILE: return "volatile";
1986         case BTF_KIND_CONST: return "const";
1987         case BTF_KIND_RESTRICT: return "restrict";
1988         case BTF_KIND_FUNC: return "func";
1989         case BTF_KIND_FUNC_PROTO: return "func_proto";
1990         case BTF_KIND_VAR: return "var";
1991         case BTF_KIND_DATASEC: return "datasec";
1992         case BTF_KIND_FLOAT: return "float";
1993         default: return "unknown";
1994         }
1995 }
1996
1997 const char *btf_kind_str(const struct btf_type *t)
1998 {
1999         return __btf_kind_str(btf_kind(t));
2000 }
2001
2002 /*
2003  * Fetch integer attribute of BTF map definition. Such attributes are
2004  * represented using a pointer to an array, in which dimensionality of array
2005  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2006  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2007  * type definition, while using only sizeof(void *) space in ELF data section.
2008  */
2009 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2010                               const struct btf_member *m, __u32 *res)
2011 {
2012         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2013         const char *name = btf__name_by_offset(btf, m->name_off);
2014         const struct btf_array *arr_info;
2015         const struct btf_type *arr_t;
2016
2017         if (!btf_is_ptr(t)) {
2018                 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2019                         map_name, name, btf_kind_str(t));
2020                 return false;
2021         }
2022
2023         arr_t = btf__type_by_id(btf, t->type);
2024         if (!arr_t) {
2025                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2026                         map_name, name, t->type);
2027                 return false;
2028         }
2029         if (!btf_is_array(arr_t)) {
2030                 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2031                         map_name, name, btf_kind_str(arr_t));
2032                 return false;
2033         }
2034         arr_info = btf_array(arr_t);
2035         *res = arr_info->nelems;
2036         return true;
2037 }
2038
2039 static int build_map_pin_path(struct bpf_map *map, const char *path)
2040 {
2041         char buf[PATH_MAX];
2042         int len;
2043
2044         if (!path)
2045                 path = "/sys/fs/bpf";
2046
2047         len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
2048         if (len < 0)
2049                 return -EINVAL;
2050         else if (len >= PATH_MAX)
2051                 return -ENAMETOOLONG;
2052
2053         return bpf_map__set_pin_path(map, buf);
2054 }
2055
2056 int parse_btf_map_def(const char *map_name, struct btf *btf,
2057                       const struct btf_type *def_t, bool strict,
2058                       struct btf_map_def *map_def, struct btf_map_def *inner_def)
2059 {
2060         const struct btf_type *t;
2061         const struct btf_member *m;
2062         bool is_inner = inner_def == NULL;
2063         int vlen, i;
2064
2065         vlen = btf_vlen(def_t);
2066         m = btf_members(def_t);
2067         for (i = 0; i < vlen; i++, m++) {
2068                 const char *name = btf__name_by_offset(btf, m->name_off);
2069
2070                 if (!name) {
2071                         pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2072                         return -EINVAL;
2073                 }
2074                 if (strcmp(name, "type") == 0) {
2075                         if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2076                                 return -EINVAL;
2077                         map_def->parts |= MAP_DEF_MAP_TYPE;
2078                 } else if (strcmp(name, "max_entries") == 0) {
2079                         if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2080                                 return -EINVAL;
2081                         map_def->parts |= MAP_DEF_MAX_ENTRIES;
2082                 } else if (strcmp(name, "map_flags") == 0) {
2083                         if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2084                                 return -EINVAL;
2085                         map_def->parts |= MAP_DEF_MAP_FLAGS;
2086                 } else if (strcmp(name, "numa_node") == 0) {
2087                         if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2088                                 return -EINVAL;
2089                         map_def->parts |= MAP_DEF_NUMA_NODE;
2090                 } else if (strcmp(name, "key_size") == 0) {
2091                         __u32 sz;
2092
2093                         if (!get_map_field_int(map_name, btf, m, &sz))
2094                                 return -EINVAL;
2095                         if (map_def->key_size && map_def->key_size != sz) {
2096                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
2097                                         map_name, map_def->key_size, sz);
2098                                 return -EINVAL;
2099                         }
2100                         map_def->key_size = sz;
2101                         map_def->parts |= MAP_DEF_KEY_SIZE;
2102                 } else if (strcmp(name, "key") == 0) {
2103                         __s64 sz;
2104
2105                         t = btf__type_by_id(btf, m->type);
2106                         if (!t) {
2107                                 pr_warn("map '%s': key type [%d] not found.\n",
2108                                         map_name, m->type);
2109                                 return -EINVAL;
2110                         }
2111                         if (!btf_is_ptr(t)) {
2112                                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2113                                         map_name, btf_kind_str(t));
2114                                 return -EINVAL;
2115                         }
2116                         sz = btf__resolve_size(btf, t->type);
2117                         if (sz < 0) {
2118                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2119                                         map_name, t->type, (ssize_t)sz);
2120                                 return sz;
2121                         }
2122                         if (map_def->key_size && map_def->key_size != sz) {
2123                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2124                                         map_name, map_def->key_size, (ssize_t)sz);
2125                                 return -EINVAL;
2126                         }
2127                         map_def->key_size = sz;
2128                         map_def->key_type_id = t->type;
2129                         map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2130                 } else if (strcmp(name, "value_size") == 0) {
2131                         __u32 sz;
2132
2133                         if (!get_map_field_int(map_name, btf, m, &sz))
2134                                 return -EINVAL;
2135                         if (map_def->value_size && map_def->value_size != sz) {
2136                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2137                                         map_name, map_def->value_size, sz);
2138                                 return -EINVAL;
2139                         }
2140                         map_def->value_size = sz;
2141                         map_def->parts |= MAP_DEF_VALUE_SIZE;
2142                 } else if (strcmp(name, "value") == 0) {
2143                         __s64 sz;
2144
2145                         t = btf__type_by_id(btf, m->type);
2146                         if (!t) {
2147                                 pr_warn("map '%s': value type [%d] not found.\n",
2148                                         map_name, m->type);
2149                                 return -EINVAL;
2150                         }
2151                         if (!btf_is_ptr(t)) {
2152                                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2153                                         map_name, btf_kind_str(t));
2154                                 return -EINVAL;
2155                         }
2156                         sz = btf__resolve_size(btf, t->type);
2157                         if (sz < 0) {
2158                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2159                                         map_name, t->type, (ssize_t)sz);
2160                                 return sz;
2161                         }
2162                         if (map_def->value_size && map_def->value_size != sz) {
2163                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2164                                         map_name, map_def->value_size, (ssize_t)sz);
2165                                 return -EINVAL;
2166                         }
2167                         map_def->value_size = sz;
2168                         map_def->value_type_id = t->type;
2169                         map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2170                 }
2171                 else if (strcmp(name, "values") == 0) {
2172                         char inner_map_name[128];
2173                         int err;
2174
2175                         if (is_inner) {
2176                                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2177                                         map_name);
2178                                 return -ENOTSUP;
2179                         }
2180                         if (i != vlen - 1) {
2181                                 pr_warn("map '%s': '%s' member should be last.\n",
2182                                         map_name, name);
2183                                 return -EINVAL;
2184                         }
2185                         if (!bpf_map_type__is_map_in_map(map_def->map_type)) {
2186                                 pr_warn("map '%s': should be map-in-map.\n",
2187                                         map_name);
2188                                 return -ENOTSUP;
2189                         }
2190                         if (map_def->value_size && map_def->value_size != 4) {
2191                                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2192                                         map_name, map_def->value_size);
2193                                 return -EINVAL;
2194                         }
2195                         map_def->value_size = 4;
2196                         t = btf__type_by_id(btf, m->type);
2197                         if (!t) {
2198                                 pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
2199                                         map_name, m->type);
2200                                 return -EINVAL;
2201                         }
2202                         if (!btf_is_array(t) || btf_array(t)->nelems) {
2203                                 pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
2204                                         map_name);
2205                                 return -EINVAL;
2206                         }
2207                         t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2208                         if (!btf_is_ptr(t)) {
2209                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2210                                         map_name, btf_kind_str(t));
2211                                 return -EINVAL;
2212                         }
2213                         t = skip_mods_and_typedefs(btf, t->type, NULL);
2214                         if (!btf_is_struct(t)) {
2215                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2216                                         map_name, btf_kind_str(t));
2217                                 return -EINVAL;
2218                         }
2219
2220                         snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2221                         err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2222                         if (err)
2223                                 return err;
2224
2225                         map_def->parts |= MAP_DEF_INNER_MAP;
2226                 } else if (strcmp(name, "pinning") == 0) {
2227                         __u32 val;
2228
2229                         if (is_inner) {
2230                                 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2231                                 return -EINVAL;
2232                         }
2233                         if (!get_map_field_int(map_name, btf, m, &val))
2234                                 return -EINVAL;
2235                         if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2236                                 pr_warn("map '%s': invalid pinning value %u.\n",
2237                                         map_name, val);
2238                                 return -EINVAL;
2239                         }
2240                         map_def->pinning = val;
2241                         map_def->parts |= MAP_DEF_PINNING;
2242                 } else {
2243                         if (strict) {
2244                                 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2245                                 return -ENOTSUP;
2246                         }
2247                         pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2248                 }
2249         }
2250
2251         if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2252                 pr_warn("map '%s': map type isn't specified.\n", map_name);
2253                 return -EINVAL;
2254         }
2255
2256         return 0;
2257 }
2258
2259 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2260 {
2261         map->def.type = def->map_type;
2262         map->def.key_size = def->key_size;
2263         map->def.value_size = def->value_size;
2264         map->def.max_entries = def->max_entries;
2265         map->def.map_flags = def->map_flags;
2266
2267         map->numa_node = def->numa_node;
2268         map->btf_key_type_id = def->key_type_id;
2269         map->btf_value_type_id = def->value_type_id;
2270
2271         if (def->parts & MAP_DEF_MAP_TYPE)
2272                 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2273
2274         if (def->parts & MAP_DEF_KEY_TYPE)
2275                 pr_debug("map '%s': found key [%u], sz = %u.\n",
2276                          map->name, def->key_type_id, def->key_size);
2277         else if (def->parts & MAP_DEF_KEY_SIZE)
2278                 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2279
2280         if (def->parts & MAP_DEF_VALUE_TYPE)
2281                 pr_debug("map '%s': found value [%u], sz = %u.\n",
2282                          map->name, def->value_type_id, def->value_size);
2283         else if (def->parts & MAP_DEF_VALUE_SIZE)
2284                 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2285
2286         if (def->parts & MAP_DEF_MAX_ENTRIES)
2287                 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2288         if (def->parts & MAP_DEF_MAP_FLAGS)
2289                 pr_debug("map '%s': found map_flags = %u.\n", map->name, def->map_flags);
2290         if (def->parts & MAP_DEF_PINNING)
2291                 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2292         if (def->parts & MAP_DEF_NUMA_NODE)
2293                 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2294
2295         if (def->parts & MAP_DEF_INNER_MAP)
2296                 pr_debug("map '%s': found inner map definition.\n", map->name);
2297 }
2298
2299 static const char *btf_var_linkage_str(__u32 linkage)
2300 {
2301         switch (linkage) {
2302         case BTF_VAR_STATIC: return "static";
2303         case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2304         case BTF_VAR_GLOBAL_EXTERN: return "extern";
2305         default: return "unknown";
2306         }
2307 }
2308
2309 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2310                                          const struct btf_type *sec,
2311                                          int var_idx, int sec_idx,
2312                                          const Elf_Data *data, bool strict,
2313                                          const char *pin_root_path)
2314 {
2315         struct btf_map_def map_def = {}, inner_def = {};
2316         const struct btf_type *var, *def;
2317         const struct btf_var_secinfo *vi;
2318         const struct btf_var *var_extra;
2319         const char *map_name;
2320         struct bpf_map *map;
2321         int err;
2322
2323         vi = btf_var_secinfos(sec) + var_idx;
2324         var = btf__type_by_id(obj->btf, vi->type);
2325         var_extra = btf_var(var);
2326         map_name = btf__name_by_offset(obj->btf, var->name_off);
2327
2328         if (map_name == NULL || map_name[0] == '\0') {
2329                 pr_warn("map #%d: empty name.\n", var_idx);
2330                 return -EINVAL;
2331         }
2332         if ((__u64)vi->offset + vi->size > data->d_size) {
2333                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2334                 return -EINVAL;
2335         }
2336         if (!btf_is_var(var)) {
2337                 pr_warn("map '%s': unexpected var kind %s.\n",
2338                         map_name, btf_kind_str(var));
2339                 return -EINVAL;
2340         }
2341         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2342                 pr_warn("map '%s': unsupported map linkage %s.\n",
2343                         map_name, btf_var_linkage_str(var_extra->linkage));
2344                 return -EOPNOTSUPP;
2345         }
2346
2347         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2348         if (!btf_is_struct(def)) {
2349                 pr_warn("map '%s': unexpected def kind %s.\n",
2350                         map_name, btf_kind_str(var));
2351                 return -EINVAL;
2352         }
2353         if (def->size > vi->size) {
2354                 pr_warn("map '%s': invalid def size.\n", map_name);
2355                 return -EINVAL;
2356         }
2357
2358         map = bpf_object__add_map(obj);
2359         if (IS_ERR(map))
2360                 return PTR_ERR(map);
2361         map->name = strdup(map_name);
2362         if (!map->name) {
2363                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2364                 return -ENOMEM;
2365         }
2366         map->libbpf_type = LIBBPF_MAP_UNSPEC;
2367         map->def.type = BPF_MAP_TYPE_UNSPEC;
2368         map->sec_idx = sec_idx;
2369         map->sec_offset = vi->offset;
2370         map->btf_var_idx = var_idx;
2371         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2372                  map_name, map->sec_idx, map->sec_offset);
2373
2374         err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2375         if (err)
2376                 return err;
2377
2378         fill_map_from_def(map, &map_def);
2379
2380         if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2381                 err = build_map_pin_path(map, pin_root_path);
2382                 if (err) {
2383                         pr_warn("map '%s': couldn't build pin path.\n", map->name);
2384                         return err;
2385                 }
2386         }
2387
2388         if (map_def.parts & MAP_DEF_INNER_MAP) {
2389                 map->inner_map = calloc(1, sizeof(*map->inner_map));
2390                 if (!map->inner_map)
2391                         return -ENOMEM;
2392                 map->inner_map->fd = -1;
2393                 map->inner_map->sec_idx = sec_idx;
2394                 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2395                 if (!map->inner_map->name)
2396                         return -ENOMEM;
2397                 sprintf(map->inner_map->name, "%s.inner", map_name);
2398
2399                 fill_map_from_def(map->inner_map, &inner_def);
2400         }
2401
2402         return 0;
2403 }
2404
2405 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2406                                           const char *pin_root_path)
2407 {
2408         const struct btf_type *sec = NULL;
2409         int nr_types, i, vlen, err;
2410         const struct btf_type *t;
2411         const char *name;
2412         Elf_Data *data;
2413         Elf_Scn *scn;
2414
2415         if (obj->efile.btf_maps_shndx < 0)
2416                 return 0;
2417
2418         scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2419         data = elf_sec_data(obj, scn);
2420         if (!scn || !data) {
2421                 pr_warn("elf: failed to get %s map definitions for %s\n",
2422                         MAPS_ELF_SEC, obj->path);
2423                 return -EINVAL;
2424         }
2425
2426         nr_types = btf__get_nr_types(obj->btf);
2427         for (i = 1; i <= nr_types; i++) {
2428                 t = btf__type_by_id(obj->btf, i);
2429                 if (!btf_is_datasec(t))
2430                         continue;
2431                 name = btf__name_by_offset(obj->btf, t->name_off);
2432                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2433                         sec = t;
2434                         obj->efile.btf_maps_sec_btf_id = i;
2435                         break;
2436                 }
2437         }
2438
2439         if (!sec) {
2440                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2441                 return -ENOENT;
2442         }
2443
2444         vlen = btf_vlen(sec);
2445         for (i = 0; i < vlen; i++) {
2446                 err = bpf_object__init_user_btf_map(obj, sec, i,
2447                                                     obj->efile.btf_maps_shndx,
2448                                                     data, strict,
2449                                                     pin_root_path);
2450                 if (err)
2451                         return err;
2452         }
2453
2454         return 0;
2455 }
2456
2457 static int bpf_object__init_maps(struct bpf_object *obj,
2458                                  const struct bpf_object_open_opts *opts)
2459 {
2460         const char *pin_root_path;
2461         bool strict;
2462         int err;
2463
2464         strict = !OPTS_GET(opts, relaxed_maps, false);
2465         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2466
2467         err = bpf_object__init_user_maps(obj, strict);
2468         err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2469         err = err ?: bpf_object__init_global_data_maps(obj);
2470         err = err ?: bpf_object__init_kconfig_map(obj);
2471         err = err ?: bpf_object__init_struct_ops_maps(obj);
2472
2473         return err;
2474 }
2475
2476 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2477 {
2478         GElf_Shdr sh;
2479
2480         if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
2481                 return false;
2482
2483         return sh.sh_flags & SHF_EXECINSTR;
2484 }
2485
2486 static bool btf_needs_sanitization(struct bpf_object *obj)
2487 {
2488         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2489         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2490         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2491         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2492
2493         return !has_func || !has_datasec || !has_func_global || !has_float;
2494 }
2495
2496 static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2497 {
2498         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2499         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2500         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2501         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2502         struct btf_type *t;
2503         int i, j, vlen;
2504
2505         for (i = 1; i <= btf__get_nr_types(btf); i++) {
2506                 t = (struct btf_type *)btf__type_by_id(btf, i);
2507
2508                 if (!has_datasec && btf_is_var(t)) {
2509                         /* replace VAR with INT */
2510                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2511                         /*
2512                          * using size = 1 is the safest choice, 4 will be too
2513                          * big and cause kernel BTF validation failure if
2514                          * original variable took less than 4 bytes
2515                          */
2516                         t->size = 1;
2517                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2518                 } else if (!has_datasec && btf_is_datasec(t)) {
2519                         /* replace DATASEC with STRUCT */
2520                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
2521                         struct btf_member *m = btf_members(t);
2522                         struct btf_type *vt;
2523                         char *name;
2524
2525                         name = (char *)btf__name_by_offset(btf, t->name_off);
2526                         while (*name) {
2527                                 if (*name == '.')
2528                                         *name = '_';
2529                                 name++;
2530                         }
2531
2532                         vlen = btf_vlen(t);
2533                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2534                         for (j = 0; j < vlen; j++, v++, m++) {
2535                                 /* order of field assignments is important */
2536                                 m->offset = v->offset * 8;
2537                                 m->type = v->type;
2538                                 /* preserve variable name as member name */
2539                                 vt = (void *)btf__type_by_id(btf, v->type);
2540                                 m->name_off = vt->name_off;
2541                         }
2542                 } else if (!has_func && btf_is_func_proto(t)) {
2543                         /* replace FUNC_PROTO with ENUM */
2544                         vlen = btf_vlen(t);
2545                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2546                         t->size = sizeof(__u32); /* kernel enforced */
2547                 } else if (!has_func && btf_is_func(t)) {
2548                         /* replace FUNC with TYPEDEF */
2549                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2550                 } else if (!has_func_global && btf_is_func(t)) {
2551                         /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2552                         t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2553                 } else if (!has_float && btf_is_float(t)) {
2554                         /* replace FLOAT with an equally-sized empty STRUCT;
2555                          * since C compilers do not accept e.g. "float" as a
2556                          * valid struct name, make it anonymous
2557                          */
2558                         t->name_off = 0;
2559                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
2560                 }
2561         }
2562 }
2563
2564 static bool libbpf_needs_btf(const struct bpf_object *obj)
2565 {
2566         return obj->efile.btf_maps_shndx >= 0 ||
2567                obj->efile.st_ops_shndx >= 0 ||
2568                obj->nr_extern > 0;
2569 }
2570
2571 static bool kernel_needs_btf(const struct bpf_object *obj)
2572 {
2573         return obj->efile.st_ops_shndx >= 0;
2574 }
2575
2576 static int bpf_object__init_btf(struct bpf_object *obj,
2577                                 Elf_Data *btf_data,
2578                                 Elf_Data *btf_ext_data)
2579 {
2580         int err = -ENOENT;
2581
2582         if (btf_data) {
2583                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2584                 err = libbpf_get_error(obj->btf);
2585                 if (err) {
2586                         obj->btf = NULL;
2587                         pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
2588                         goto out;
2589                 }
2590                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2591                 btf__set_pointer_size(obj->btf, 8);
2592         }
2593         if (btf_ext_data) {
2594                 if (!obj->btf) {
2595                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2596                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2597                         goto out;
2598                 }
2599                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
2600                 err = libbpf_get_error(obj->btf_ext);
2601                 if (err) {
2602                         pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
2603                                 BTF_EXT_ELF_SEC, err);
2604                         obj->btf_ext = NULL;
2605                         goto out;
2606                 }
2607         }
2608 out:
2609         if (err && libbpf_needs_btf(obj)) {
2610                 pr_warn("BTF is required, but is missing or corrupted.\n");
2611                 return err;
2612         }
2613         return 0;
2614 }
2615
2616 static int bpf_object__finalize_btf(struct bpf_object *obj)
2617 {
2618         int err;
2619
2620         if (!obj->btf)
2621                 return 0;
2622
2623         err = btf__finalize_data(obj, obj->btf);
2624         if (err) {
2625                 pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
2626                 return err;
2627         }
2628
2629         return 0;
2630 }
2631
2632 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
2633 {
2634         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
2635             prog->type == BPF_PROG_TYPE_LSM)
2636                 return true;
2637
2638         /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
2639          * also need vmlinux BTF
2640          */
2641         if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
2642                 return true;
2643
2644         return false;
2645 }
2646
2647 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
2648 {
2649         struct bpf_program *prog;
2650         int i;
2651
2652         /* CO-RE relocations need kernel BTF, only when btf_custom_path
2653          * is not specified
2654          */
2655         if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
2656                 return true;
2657
2658         /* Support for typed ksyms needs kernel BTF */
2659         for (i = 0; i < obj->nr_extern; i++) {
2660                 const struct extern_desc *ext;
2661
2662                 ext = &obj->externs[i];
2663                 if (ext->type == EXT_KSYM && ext->ksym.type_id)
2664                         return true;
2665         }
2666
2667         bpf_object__for_each_program(prog, obj) {
2668                 if (!prog->load)
2669                         continue;
2670                 if (prog_needs_vmlinux_btf(prog))
2671                         return true;
2672         }
2673
2674         return false;
2675 }
2676
2677 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
2678 {
2679         int err;
2680
2681         /* btf_vmlinux could be loaded earlier */
2682         if (obj->btf_vmlinux || obj->gen_loader)
2683                 return 0;
2684
2685         if (!force && !obj_needs_vmlinux_btf(obj))
2686                 return 0;
2687
2688         obj->btf_vmlinux = libbpf_find_kernel_btf();
2689         err = libbpf_get_error(obj->btf_vmlinux);
2690         if (err) {
2691                 pr_warn("Error loading vmlinux BTF: %d\n", err);
2692                 obj->btf_vmlinux = NULL;
2693                 return err;
2694         }
2695         return 0;
2696 }
2697
2698 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
2699 {
2700         struct btf *kern_btf = obj->btf;
2701         bool btf_mandatory, sanitize;
2702         int i, err = 0;
2703
2704         if (!obj->btf)
2705                 return 0;
2706
2707         if (!kernel_supports(obj, FEAT_BTF)) {
2708                 if (kernel_needs_btf(obj)) {
2709                         err = -EOPNOTSUPP;
2710                         goto report;
2711                 }
2712                 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
2713                 return 0;
2714         }
2715
2716         /* Even though some subprogs are global/weak, user might prefer more
2717          * permissive BPF verification process that BPF verifier performs for
2718          * static functions, taking into account more context from the caller
2719          * functions. In such case, they need to mark such subprogs with
2720          * __attribute__((visibility("hidden"))) and libbpf will adjust
2721          * corresponding FUNC BTF type to be marked as static and trigger more
2722          * involved BPF verification process.
2723          */
2724         for (i = 0; i < obj->nr_programs; i++) {
2725                 struct bpf_program *prog = &obj->programs[i];
2726                 struct btf_type *t;
2727                 const char *name;
2728                 int j, n;
2729
2730                 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
2731                         continue;
2732
2733                 n = btf__get_nr_types(obj->btf);
2734                 for (j = 1; j <= n; j++) {
2735                         t = btf_type_by_id(obj->btf, j);
2736                         if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
2737                                 continue;
2738
2739                         name = btf__str_by_offset(obj->btf, t->name_off);
2740                         if (strcmp(name, prog->name) != 0)
2741                                 continue;
2742
2743                         t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
2744                         break;
2745                 }
2746         }
2747
2748         sanitize = btf_needs_sanitization(obj);
2749         if (sanitize) {
2750                 const void *raw_data;
2751                 __u32 sz;
2752
2753                 /* clone BTF to sanitize a copy and leave the original intact */
2754                 raw_data = btf__get_raw_data(obj->btf, &sz);
2755                 kern_btf = btf__new(raw_data, sz);
2756                 err = libbpf_get_error(kern_btf);
2757                 if (err)
2758                         return err;
2759
2760                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2761                 btf__set_pointer_size(obj->btf, 8);
2762                 bpf_object__sanitize_btf(obj, kern_btf);
2763         }
2764
2765         if (obj->gen_loader) {
2766                 __u32 raw_size = 0;
2767                 const void *raw_data = btf__get_raw_data(kern_btf, &raw_size);
2768
2769                 if (!raw_data)
2770                         return -ENOMEM;
2771                 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
2772                 /* Pretend to have valid FD to pass various fd >= 0 checks.
2773                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
2774                  */
2775                 btf__set_fd(kern_btf, 0);
2776         } else {
2777                 err = btf__load(kern_btf);
2778         }
2779         if (sanitize) {
2780                 if (!err) {
2781                         /* move fd to libbpf's BTF */
2782                         btf__set_fd(obj->btf, btf__fd(kern_btf));
2783                         btf__set_fd(kern_btf, -1);
2784                 }
2785                 btf__free(kern_btf);
2786         }
2787 report:
2788         if (err) {
2789                 btf_mandatory = kernel_needs_btf(obj);
2790                 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
2791                         btf_mandatory ? "BTF is mandatory, can't proceed."
2792                                       : "BTF is optional, ignoring.");
2793                 if (!btf_mandatory)
2794                         err = 0;
2795         }
2796         return err;
2797 }
2798
2799 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
2800 {
2801         const char *name;
2802
2803         name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
2804         if (!name) {
2805                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2806                         off, obj->path, elf_errmsg(-1));
2807                 return NULL;
2808         }
2809
2810         return name;
2811 }
2812
2813 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
2814 {
2815         const char *name;
2816
2817         name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
2818         if (!name) {
2819                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2820                         off, obj->path, elf_errmsg(-1));
2821                 return NULL;
2822         }
2823
2824         return name;
2825 }
2826
2827 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
2828 {
2829         Elf_Scn *scn;
2830
2831         scn = elf_getscn(obj->efile.elf, idx);
2832         if (!scn) {
2833                 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
2834                         idx, obj->path, elf_errmsg(-1));
2835                 return NULL;
2836         }
2837         return scn;
2838 }
2839
2840 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
2841 {
2842         Elf_Scn *scn = NULL;
2843         Elf *elf = obj->efile.elf;
2844         const char *sec_name;
2845
2846         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2847                 sec_name = elf_sec_name(obj, scn);
2848                 if (!sec_name)
2849                         return NULL;
2850
2851                 if (strcmp(sec_name, name) != 0)
2852                         continue;
2853
2854                 return scn;
2855         }
2856         return NULL;
2857 }
2858
2859 static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
2860 {
2861         if (!scn)
2862                 return -EINVAL;
2863
2864         if (gelf_getshdr(scn, hdr) != hdr) {
2865                 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
2866                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2867                 return -EINVAL;
2868         }
2869
2870         return 0;
2871 }
2872
2873 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
2874 {
2875         const char *name;
2876         GElf_Shdr sh;
2877
2878         if (!scn)
2879                 return NULL;
2880
2881         if (elf_sec_hdr(obj, scn, &sh))
2882                 return NULL;
2883
2884         name = elf_sec_str(obj, sh.sh_name);
2885         if (!name) {
2886                 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
2887                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2888                 return NULL;
2889         }
2890
2891         return name;
2892 }
2893
2894 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
2895 {
2896         Elf_Data *data;
2897
2898         if (!scn)
2899                 return NULL;
2900
2901         data = elf_getdata(scn, 0);
2902         if (!data) {
2903                 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
2904                         elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
2905                         obj->path, elf_errmsg(-1));
2906                 return NULL;
2907         }
2908
2909         return data;
2910 }
2911
2912 static bool is_sec_name_dwarf(const char *name)
2913 {
2914         /* approximation, but the actual list is too long */
2915         return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
2916 }
2917
2918 static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
2919 {
2920         /* no special handling of .strtab */
2921         if (hdr->sh_type == SHT_STRTAB)
2922                 return true;
2923
2924         /* ignore .llvm_addrsig section as well */
2925         if (hdr->sh_type == SHT_LLVM_ADDRSIG)
2926                 return true;
2927
2928         /* no subprograms will lead to an empty .text section, ignore it */
2929         if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
2930             strcmp(name, ".text") == 0)
2931                 return true;
2932
2933         /* DWARF sections */
2934         if (is_sec_name_dwarf(name))
2935                 return true;
2936
2937         if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
2938                 name += sizeof(".rel") - 1;
2939                 /* DWARF section relocations */
2940                 if (is_sec_name_dwarf(name))
2941                         return true;
2942
2943                 /* .BTF and .BTF.ext don't need relocations */
2944                 if (strcmp(name, BTF_ELF_SEC) == 0 ||
2945                     strcmp(name, BTF_EXT_ELF_SEC) == 0)
2946                         return true;
2947         }
2948
2949         return false;
2950 }
2951
2952 static int cmp_progs(const void *_a, const void *_b)
2953 {
2954         const struct bpf_program *a = _a;
2955         const struct bpf_program *b = _b;
2956
2957         if (a->sec_idx != b->sec_idx)
2958                 return a->sec_idx < b->sec_idx ? -1 : 1;
2959
2960         /* sec_insn_off can't be the same within the section */
2961         return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
2962 }
2963
2964 static int bpf_object__elf_collect(struct bpf_object *obj)
2965 {
2966         Elf *elf = obj->efile.elf;
2967         Elf_Data *btf_ext_data = NULL;
2968         Elf_Data *btf_data = NULL;
2969         int idx = 0, err = 0;
2970         const char *name;
2971         Elf_Data *data;
2972         Elf_Scn *scn;
2973         GElf_Shdr sh;
2974
2975         /* a bunch of ELF parsing functionality depends on processing symbols,
2976          * so do the first pass and find the symbol table
2977          */
2978         scn = NULL;
2979         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2980                 if (elf_sec_hdr(obj, scn, &sh))
2981                         return -LIBBPF_ERRNO__FORMAT;
2982
2983                 if (sh.sh_type == SHT_SYMTAB) {
2984                         if (obj->efile.symbols) {
2985                                 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
2986                                 return -LIBBPF_ERRNO__FORMAT;
2987                         }
2988
2989                         data = elf_sec_data(obj, scn);
2990                         if (!data)
2991                                 return -LIBBPF_ERRNO__FORMAT;
2992
2993                         obj->efile.symbols = data;
2994                         obj->efile.symbols_shndx = elf_ndxscn(scn);
2995                         obj->efile.strtabidx = sh.sh_link;
2996                 }
2997         }
2998
2999         scn = NULL;
3000         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3001                 idx++;
3002
3003                 if (elf_sec_hdr(obj, scn, &sh))
3004                         return -LIBBPF_ERRNO__FORMAT;
3005
3006                 name = elf_sec_str(obj, sh.sh_name);
3007                 if (!name)
3008                         return -LIBBPF_ERRNO__FORMAT;
3009
3010                 if (ignore_elf_section(&sh, name))
3011                         continue;
3012
3013                 data = elf_sec_data(obj, scn);
3014                 if (!data)
3015                         return -LIBBPF_ERRNO__FORMAT;
3016
3017                 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3018                          idx, name, (unsigned long)data->d_size,
3019                          (int)sh.sh_link, (unsigned long)sh.sh_flags,
3020                          (int)sh.sh_type);
3021
3022                 if (strcmp(name, "license") == 0) {
3023                         err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3024                         if (err)
3025                                 return err;
3026                 } else if (strcmp(name, "version") == 0) {
3027                         err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3028                         if (err)
3029                                 return err;
3030                 } else if (strcmp(name, "maps") == 0) {
3031                         obj->efile.maps_shndx = idx;
3032                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3033                         obj->efile.btf_maps_shndx = idx;
3034                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3035                         btf_data = data;
3036                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3037                         btf_ext_data = data;
3038                 } else if (sh.sh_type == SHT_SYMTAB) {
3039                         /* already processed during the first pass above */
3040                 } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
3041                         if (sh.sh_flags & SHF_EXECINSTR) {
3042                                 if (strcmp(name, ".text") == 0)
3043                                         obj->efile.text_shndx = idx;
3044                                 err = bpf_object__add_programs(obj, data, name, idx);
3045                                 if (err)
3046                                         return err;
3047                         } else if (strcmp(name, DATA_SEC) == 0) {
3048                                 obj->efile.data = data;
3049                                 obj->efile.data_shndx = idx;
3050                         } else if (strcmp(name, RODATA_SEC) == 0) {
3051                                 obj->efile.rodata = data;
3052                                 obj->efile.rodata_shndx = idx;
3053                         } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
3054                                 obj->efile.st_ops_data = data;
3055                                 obj->efile.st_ops_shndx = idx;
3056                         } else {
3057                                 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3058                                         idx, name);
3059                         }
3060                 } else if (sh.sh_type == SHT_REL) {
3061                         int nr_sects = obj->efile.nr_reloc_sects;
3062                         void *sects = obj->efile.reloc_sects;
3063                         int sec = sh.sh_info; /* points to other section */
3064
3065                         /* Only do relo for section with exec instructions */
3066                         if (!section_have_execinstr(obj, sec) &&
3067                             strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3068                             strcmp(name, ".rel" MAPS_ELF_SEC)) {
3069                                 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3070                                         idx, name, sec,
3071                                         elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
3072                                 continue;
3073                         }
3074
3075                         sects = libbpf_reallocarray(sects, nr_sects + 1,
3076                                                     sizeof(*obj->efile.reloc_sects));
3077                         if (!sects)
3078                                 return -ENOMEM;
3079
3080                         obj->efile.reloc_sects = sects;
3081                         obj->efile.nr_reloc_sects++;
3082
3083                         obj->efile.reloc_sects[nr_sects].shdr = sh;
3084                         obj->efile.reloc_sects[nr_sects].data = data;
3085                 } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
3086                         obj->efile.bss = data;
3087                         obj->efile.bss_shndx = idx;
3088                 } else {
3089                         pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3090                                 (size_t)sh.sh_size);
3091                 }
3092         }
3093
3094         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3095                 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3096                 return -LIBBPF_ERRNO__FORMAT;
3097         }
3098
3099         /* sort BPF programs by section name and in-section instruction offset
3100          * for faster search */
3101         qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3102
3103         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3104 }
3105
3106 static bool sym_is_extern(const GElf_Sym *sym)
3107 {
3108         int bind = GELF_ST_BIND(sym->st_info);
3109         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3110         return sym->st_shndx == SHN_UNDEF &&
3111                (bind == STB_GLOBAL || bind == STB_WEAK) &&
3112                GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
3113 }
3114
3115 static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
3116 {
3117         int bind = GELF_ST_BIND(sym->st_info);
3118         int type = GELF_ST_TYPE(sym->st_info);
3119
3120         /* in .text section */
3121         if (sym->st_shndx != text_shndx)
3122                 return false;
3123
3124         /* local function */
3125         if (bind == STB_LOCAL && type == STT_SECTION)
3126                 return true;
3127
3128         /* global function */
3129         return bind == STB_GLOBAL && type == STT_FUNC;
3130 }
3131
3132 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3133 {
3134         const struct btf_type *t;
3135         const char *tname;
3136         int i, n;
3137
3138         if (!btf)
3139                 return -ESRCH;
3140
3141         n = btf__get_nr_types(btf);
3142         for (i = 1; i <= n; i++) {
3143                 t = btf__type_by_id(btf, i);
3144
3145                 if (!btf_is_var(t) && !btf_is_func(t))
3146                         continue;
3147
3148                 tname = btf__name_by_offset(btf, t->name_off);
3149                 if (strcmp(tname, ext_name))
3150                         continue;
3151
3152                 if (btf_is_var(t) &&
3153                     btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
3154                         return -EINVAL;
3155
3156                 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
3157                         return -EINVAL;
3158
3159                 return i;
3160         }
3161
3162         return -ENOENT;
3163 }
3164
3165 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
3166         const struct btf_var_secinfo *vs;
3167         const struct btf_type *t;
3168         int i, j, n;
3169
3170         if (!btf)
3171                 return -ESRCH;
3172
3173         n = btf__get_nr_types(btf);
3174         for (i = 1; i <= n; i++) {
3175                 t = btf__type_by_id(btf, i);
3176
3177                 if (!btf_is_datasec(t))
3178                         continue;
3179
3180                 vs = btf_var_secinfos(t);
3181                 for (j = 0; j < btf_vlen(t); j++, vs++) {
3182                         if (vs->type == ext_btf_id)
3183                                 return i;
3184                 }
3185         }
3186
3187         return -ENOENT;
3188 }
3189
3190 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3191                                      bool *is_signed)
3192 {
3193         const struct btf_type *t;
3194         const char *name;
3195
3196         t = skip_mods_and_typedefs(btf, id, NULL);
3197         name = btf__name_by_offset(btf, t->name_off);
3198
3199         if (is_signed)
3200                 *is_signed = false;
3201         switch (btf_kind(t)) {
3202         case BTF_KIND_INT: {
3203                 int enc = btf_int_encoding(t);
3204
3205                 if (enc & BTF_INT_BOOL)
3206                         return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3207                 if (is_signed)
3208                         *is_signed = enc & BTF_INT_SIGNED;
3209                 if (t->size == 1)
3210                         return KCFG_CHAR;
3211                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3212                         return KCFG_UNKNOWN;
3213                 return KCFG_INT;
3214         }
3215         case BTF_KIND_ENUM:
3216                 if (t->size != 4)
3217                         return KCFG_UNKNOWN;
3218                 if (strcmp(name, "libbpf_tristate"))
3219                         return KCFG_UNKNOWN;
3220                 return KCFG_TRISTATE;
3221         case BTF_KIND_ARRAY:
3222                 if (btf_array(t)->nelems == 0)
3223                         return KCFG_UNKNOWN;
3224                 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3225                         return KCFG_UNKNOWN;
3226                 return KCFG_CHAR_ARR;
3227         default:
3228                 return KCFG_UNKNOWN;
3229         }
3230 }
3231
3232 static int cmp_externs(const void *_a, const void *_b)
3233 {
3234         const struct extern_desc *a = _a;
3235         const struct extern_desc *b = _b;
3236
3237         if (a->type != b->type)
3238                 return a->type < b->type ? -1 : 1;
3239
3240         if (a->type == EXT_KCFG) {
3241                 /* descending order by alignment requirements */
3242                 if (a->kcfg.align != b->kcfg.align)
3243                         return a->kcfg.align > b->kcfg.align ? -1 : 1;
3244                 /* ascending order by size, within same alignment class */
3245                 if (a->kcfg.sz != b->kcfg.sz)
3246                         return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3247         }
3248
3249         /* resolve ties by name */
3250         return strcmp(a->name, b->name);
3251 }
3252
3253 static int find_int_btf_id(const struct btf *btf)
3254 {
3255         const struct btf_type *t;
3256         int i, n;
3257
3258         n = btf__get_nr_types(btf);
3259         for (i = 1; i <= n; i++) {
3260                 t = btf__type_by_id(btf, i);
3261
3262                 if (btf_is_int(t) && btf_int_bits(t) == 32)
3263                         return i;
3264         }
3265
3266         return 0;
3267 }
3268
3269 static int add_dummy_ksym_var(struct btf *btf)
3270 {
3271         int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
3272         const struct btf_var_secinfo *vs;
3273         const struct btf_type *sec;
3274
3275         if (!btf)
3276                 return 0;
3277
3278         sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
3279                                             BTF_KIND_DATASEC);
3280         if (sec_btf_id < 0)
3281                 return 0;
3282
3283         sec = btf__type_by_id(btf, sec_btf_id);
3284         vs = btf_var_secinfos(sec);
3285         for (i = 0; i < btf_vlen(sec); i++, vs++) {
3286                 const struct btf_type *vt;
3287
3288                 vt = btf__type_by_id(btf, vs->type);
3289                 if (btf_is_func(vt))
3290                         break;
3291         }
3292
3293         /* No func in ksyms sec.  No need to add dummy var. */
3294         if (i == btf_vlen(sec))
3295                 return 0;
3296
3297         int_btf_id = find_int_btf_id(btf);
3298         dummy_var_btf_id = btf__add_var(btf,
3299                                         "dummy_ksym",
3300                                         BTF_VAR_GLOBAL_ALLOCATED,
3301                                         int_btf_id);
3302         if (dummy_var_btf_id < 0)
3303                 pr_warn("cannot create a dummy_ksym var\n");
3304
3305         return dummy_var_btf_id;
3306 }
3307
3308 static int bpf_object__collect_externs(struct bpf_object *obj)
3309 {
3310         struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3311         const struct btf_type *t;
3312         struct extern_desc *ext;
3313         int i, n, off, dummy_var_btf_id;
3314         const char *ext_name, *sec_name;
3315         Elf_Scn *scn;
3316         GElf_Shdr sh;
3317
3318         if (!obj->efile.symbols)
3319                 return 0;
3320
3321         scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3322         if (elf_sec_hdr(obj, scn, &sh))
3323                 return -LIBBPF_ERRNO__FORMAT;
3324
3325         dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
3326         if (dummy_var_btf_id < 0)
3327                 return dummy_var_btf_id;
3328
3329         n = sh.sh_size / sh.sh_entsize;
3330         pr_debug("looking for externs among %d symbols...\n", n);
3331
3332         for (i = 0; i < n; i++) {
3333                 GElf_Sym sym;
3334
3335                 if (!gelf_getsym(obj->efile.symbols, i, &sym))
3336                         return -LIBBPF_ERRNO__FORMAT;
3337                 if (!sym_is_extern(&sym))
3338                         continue;
3339                 ext_name = elf_sym_str(obj, sym.st_name);
3340                 if (!ext_name || !ext_name[0])
3341                         continue;
3342
3343                 ext = obj->externs;
3344                 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3345                 if (!ext)
3346                         return -ENOMEM;
3347                 obj->externs = ext;
3348                 ext = &ext[obj->nr_extern];
3349                 memset(ext, 0, sizeof(*ext));
3350                 obj->nr_extern++;
3351
3352                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3353                 if (ext->btf_id <= 0) {
3354                         pr_warn("failed to find BTF for extern '%s': %d\n",
3355                                 ext_name, ext->btf_id);
3356                         return ext->btf_id;
3357                 }
3358                 t = btf__type_by_id(obj->btf, ext->btf_id);
3359                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
3360                 ext->sym_idx = i;
3361                 ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
3362
3363                 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3364                 if (ext->sec_btf_id <= 0) {
3365                         pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3366                                 ext_name, ext->btf_id, ext->sec_btf_id);
3367                         return ext->sec_btf_id;
3368                 }
3369                 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3370                 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3371
3372                 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3373                         if (btf_is_func(t)) {
3374                                 pr_warn("extern function %s is unsupported under %s section\n",
3375                                         ext->name, KCONFIG_SEC);
3376                                 return -ENOTSUP;
3377                         }
3378                         kcfg_sec = sec;
3379                         ext->type = EXT_KCFG;
3380                         ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3381                         if (ext->kcfg.sz <= 0) {
3382                                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3383                                         ext_name, ext->kcfg.sz);
3384                                 return ext->kcfg.sz;
3385                         }
3386                         ext->kcfg.align = btf__align_of(obj->btf, t->type);
3387                         if (ext->kcfg.align <= 0) {
3388                                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3389                                         ext_name, ext->kcfg.align);
3390                                 return -EINVAL;
3391                         }
3392                         ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3393                                                         &ext->kcfg.is_signed);
3394                         if (ext->kcfg.type == KCFG_UNKNOWN) {
3395                                 pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
3396                                 return -ENOTSUP;
3397                         }
3398                 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3399                         if (btf_is_func(t) && ext->is_weak) {
3400                                 pr_warn("extern weak function %s is unsupported\n",
3401                                         ext->name);
3402                                 return -ENOTSUP;
3403                         }
3404                         ksym_sec = sec;
3405                         ext->type = EXT_KSYM;
3406                         skip_mods_and_typedefs(obj->btf, t->type,
3407                                                &ext->ksym.type_id);
3408                 } else {
3409                         pr_warn("unrecognized extern section '%s'\n", sec_name);
3410                         return -ENOTSUP;
3411                 }
3412         }
3413         pr_debug("collected %d externs total\n", obj->nr_extern);
3414
3415         if (!obj->nr_extern)
3416                 return 0;
3417
3418         /* sort externs by type, for kcfg ones also by (align, size, name) */
3419         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3420
3421         /* for .ksyms section, we need to turn all externs into allocated
3422          * variables in BTF to pass kernel verification; we do this by
3423          * pretending that each extern is a 8-byte variable
3424          */
3425         if (ksym_sec) {
3426                 /* find existing 4-byte integer type in BTF to use for fake
3427                  * extern variables in DATASEC
3428                  */
3429                 int int_btf_id = find_int_btf_id(obj->btf);
3430                 /* For extern function, a dummy_var added earlier
3431                  * will be used to replace the vs->type and
3432                  * its name string will be used to refill
3433                  * the missing param's name.
3434                  */
3435                 const struct btf_type *dummy_var;
3436
3437                 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
3438                 for (i = 0; i < obj->nr_extern; i++) {
3439                         ext = &obj->externs[i];
3440                         if (ext->type != EXT_KSYM)
3441                                 continue;
3442                         pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3443                                  i, ext->sym_idx, ext->name);
3444                 }
3445
3446                 sec = ksym_sec;
3447                 n = btf_vlen(sec);
3448                 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3449                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3450                         struct btf_type *vt;
3451
3452                         vt = (void *)btf__type_by_id(obj->btf, vs->type);
3453                         ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3454                         ext = find_extern_by_name(obj, ext_name);
3455                         if (!ext) {
3456                                 pr_warn("failed to find extern definition for BTF %s '%s'\n",
3457                                         btf_kind_str(vt), ext_name);
3458                                 return -ESRCH;
3459                         }
3460                         if (btf_is_func(vt)) {
3461                                 const struct btf_type *func_proto;
3462                                 struct btf_param *param;
3463                                 int j;
3464
3465                                 func_proto = btf__type_by_id(obj->btf,
3466                                                              vt->type);
3467                                 param = btf_params(func_proto);
3468                                 /* Reuse the dummy_var string if the
3469                                  * func proto does not have param name.
3470                                  */
3471                                 for (j = 0; j < btf_vlen(func_proto); j++)
3472                                         if (param[j].type && !param[j].name_off)
3473                                                 param[j].name_off =
3474                                                         dummy_var->name_off;
3475                                 vs->type = dummy_var_btf_id;
3476                                 vt->info &= ~0xffff;
3477                                 vt->info |= BTF_FUNC_GLOBAL;
3478                         } else {
3479                                 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3480                                 vt->type = int_btf_id;
3481                         }
3482                         vs->offset = off;
3483                         vs->size = sizeof(int);
3484                 }
3485                 sec->size = off;
3486         }
3487
3488         if (kcfg_sec) {
3489                 sec = kcfg_sec;
3490                 /* for kcfg externs calculate their offsets within a .kconfig map */
3491                 off = 0;
3492                 for (i = 0; i < obj->nr_extern; i++) {
3493                         ext = &obj->externs[i];
3494                         if (ext->type != EXT_KCFG)
3495                                 continue;
3496
3497                         ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3498                         off = ext->kcfg.data_off + ext->kcfg.sz;
3499                         pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3500                                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3501                 }
3502                 sec->size = off;
3503                 n = btf_vlen(sec);
3504                 for (i = 0; i < n; i++) {
3505                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3506
3507                         t = btf__type_by_id(obj->btf, vs->type);
3508                         ext_name = btf__name_by_offset(obj->btf, t->name_off);
3509                         ext = find_extern_by_name(obj, ext_name);
3510                         if (!ext) {
3511                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3512                                         ext_name);
3513                                 return -ESRCH;
3514                         }
3515                         btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3516                         vs->offset = ext->kcfg.data_off;
3517                 }
3518         }
3519         return 0;
3520 }
3521
3522 struct bpf_program *
3523 bpf_object__find_program_by_title(const struct bpf_object *obj,
3524                                   const char *title)
3525 {
3526         struct bpf_program *pos;
3527
3528         bpf_object__for_each_program(pos, obj) {
3529                 if (pos->sec_name && !strcmp(pos->sec_name, title))
3530                         return pos;
3531         }
3532         return errno = ENOENT, NULL;
3533 }
3534
3535 static bool prog_is_subprog(const struct bpf_object *obj,
3536                             const struct bpf_program *prog)
3537 {
3538         /* For legacy reasons, libbpf supports an entry-point BPF programs
3539          * without SEC() attribute, i.e., those in the .text section. But if
3540          * there are 2 or more such programs in the .text section, they all
3541          * must be subprograms called from entry-point BPF programs in
3542          * designated SEC()'tions, otherwise there is no way to distinguish
3543          * which of those programs should be loaded vs which are a subprogram.
3544          * Similarly, if there is a function/program in .text and at least one
3545          * other BPF program with custom SEC() attribute, then we just assume
3546          * .text programs are subprograms (even if they are not called from
3547          * other programs), because libbpf never explicitly supported mixing
3548          * SEC()-designated BPF programs and .text entry-point BPF programs.
3549          */
3550         return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
3551 }
3552
3553 struct bpf_program *
3554 bpf_object__find_program_by_name(const struct bpf_object *obj,
3555                                  const char *name)
3556 {
3557         struct bpf_program *prog;
3558
3559         bpf_object__for_each_program(prog, obj) {
3560                 if (prog_is_subprog(obj, prog))
3561                         continue;
3562                 if (!strcmp(prog->name, name))
3563                         return prog;
3564         }
3565         return errno = ENOENT, NULL;
3566 }
3567
3568 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3569                                       int shndx)
3570 {
3571         return shndx == obj->efile.data_shndx ||
3572                shndx == obj->efile.bss_shndx ||
3573                shndx == obj->efile.rodata_shndx;
3574 }
3575
3576 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
3577                                       int shndx)
3578 {
3579         return shndx == obj->efile.maps_shndx ||
3580                shndx == obj->efile.btf_maps_shndx;
3581 }
3582
3583 static enum libbpf_map_type
3584 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
3585 {
3586         if (shndx == obj->efile.data_shndx)
3587                 return LIBBPF_MAP_DATA;
3588         else if (shndx == obj->efile.bss_shndx)
3589                 return LIBBPF_MAP_BSS;
3590         else if (shndx == obj->efile.rodata_shndx)
3591                 return LIBBPF_MAP_RODATA;
3592         else if (shndx == obj->efile.symbols_shndx)
3593                 return LIBBPF_MAP_KCONFIG;
3594         else
3595                 return LIBBPF_MAP_UNSPEC;
3596 }
3597
3598 static int bpf_program__record_reloc(struct bpf_program *prog,
3599                                      struct reloc_desc *reloc_desc,
3600                                      __u32 insn_idx, const char *sym_name,
3601                                      const GElf_Sym *sym, const GElf_Rel *rel)
3602 {
3603         struct bpf_insn *insn = &prog->insns[insn_idx];
3604         size_t map_idx, nr_maps = prog->obj->nr_maps;
3605         struct bpf_object *obj = prog->obj;
3606         __u32 shdr_idx = sym->st_shndx;
3607         enum libbpf_map_type type;
3608         const char *sym_sec_name;
3609         struct bpf_map *map;
3610
3611         if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
3612                 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
3613                         prog->name, sym_name, insn_idx, insn->code);
3614                 return -LIBBPF_ERRNO__RELOC;
3615         }
3616
3617         if (sym_is_extern(sym)) {
3618                 int sym_idx = GELF_R_SYM(rel->r_info);
3619                 int i, n = obj->nr_extern;
3620                 struct extern_desc *ext;
3621
3622                 for (i = 0; i < n; i++) {
3623                         ext = &obj->externs[i];
3624                         if (ext->sym_idx == sym_idx)
3625                                 break;
3626                 }
3627                 if (i >= n) {
3628                         pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
3629                                 prog->name, sym_name, sym_idx);
3630                         return -LIBBPF_ERRNO__RELOC;
3631                 }
3632                 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
3633                          prog->name, i, ext->name, ext->sym_idx, insn_idx);
3634                 if (insn->code == (BPF_JMP | BPF_CALL))
3635                         reloc_desc->type = RELO_EXTERN_FUNC;
3636                 else
3637                         reloc_desc->type = RELO_EXTERN_VAR;
3638                 reloc_desc->insn_idx = insn_idx;
3639                 reloc_desc->sym_off = i; /* sym_off stores extern index */
3640                 return 0;
3641         }
3642
3643         /* sub-program call relocation */
3644         if (is_call_insn(insn)) {
3645                 if (insn->src_reg != BPF_PSEUDO_CALL) {
3646                         pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
3647                         return -LIBBPF_ERRNO__RELOC;
3648                 }
3649                 /* text_shndx can be 0, if no default "main" program exists */
3650                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
3651                         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3652                         pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
3653                                 prog->name, sym_name, sym_sec_name);
3654                         return -LIBBPF_ERRNO__RELOC;
3655                 }
3656                 if (sym->st_value % BPF_INSN_SZ) {
3657                         pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
3658                                 prog->name, sym_name, (size_t)sym->st_value);
3659                         return -LIBBPF_ERRNO__RELOC;
3660                 }
3661                 reloc_desc->type = RELO_CALL;
3662                 reloc_desc->insn_idx = insn_idx;
3663                 reloc_desc->sym_off = sym->st_value;
3664                 return 0;
3665         }
3666
3667         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
3668                 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
3669                         prog->name, sym_name, shdr_idx);
3670                 return -LIBBPF_ERRNO__RELOC;
3671         }
3672
3673         /* loading subprog addresses */
3674         if (sym_is_subprog(sym, obj->efile.text_shndx)) {
3675                 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
3676                  * local_func: sym->st_value = 0, insn->imm = offset in the section.
3677                  */
3678                 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
3679                         pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
3680                                 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
3681                         return -LIBBPF_ERRNO__RELOC;
3682                 }
3683
3684                 reloc_desc->type = RELO_SUBPROG_ADDR;
3685                 reloc_desc->insn_idx = insn_idx;
3686                 reloc_desc->sym_off = sym->st_value;
3687                 return 0;
3688         }
3689
3690         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
3691         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3692
3693         /* generic map reference relocation */
3694         if (type == LIBBPF_MAP_UNSPEC) {
3695                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
3696                         pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
3697                                 prog->name, sym_name, sym_sec_name);
3698                         return -LIBBPF_ERRNO__RELOC;
3699                 }
3700                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3701                         map = &obj->maps[map_idx];
3702                         if (map->libbpf_type != type ||
3703                             map->sec_idx != sym->st_shndx ||
3704                             map->sec_offset != sym->st_value)
3705                                 continue;
3706                         pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
3707                                  prog->name, map_idx, map->name, map->sec_idx,
3708                                  map->sec_offset, insn_idx);
3709                         break;
3710                 }
3711                 if (map_idx >= nr_maps) {
3712                         pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
3713                                 prog->name, sym_sec_name, (size_t)sym->st_value);
3714                         return -LIBBPF_ERRNO__RELOC;
3715                 }
3716                 reloc_desc->type = RELO_LD64;
3717                 reloc_desc->insn_idx = insn_idx;
3718                 reloc_desc->map_idx = map_idx;
3719                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
3720                 return 0;
3721         }
3722
3723         /* global data map relocation */
3724         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
3725                 pr_warn("prog '%s': bad data relo against section '%s'\n",
3726                         prog->name, sym_sec_name);
3727                 return -LIBBPF_ERRNO__RELOC;
3728         }
3729         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3730                 map = &obj->maps[map_idx];
3731                 if (map->libbpf_type != type)
3732                         continue;
3733                 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
3734                          prog->name, map_idx, map->name, map->sec_idx,
3735                          map->sec_offset, insn_idx);
3736                 break;
3737         }
3738         if (map_idx >= nr_maps) {
3739                 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
3740                         prog->name, sym_sec_name);
3741                 return -LIBBPF_ERRNO__RELOC;
3742         }
3743
3744         reloc_desc->type = RELO_DATA;
3745         reloc_desc->insn_idx = insn_idx;
3746         reloc_desc->map_idx = map_idx;
3747         reloc_desc->sym_off = sym->st_value;
3748         return 0;
3749 }
3750
3751 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
3752 {
3753         return insn_idx >= prog->sec_insn_off &&
3754                insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
3755 }
3756
3757 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
3758                                                  size_t sec_idx, size_t insn_idx)
3759 {
3760         int l = 0, r = obj->nr_programs - 1, m;
3761         struct bpf_program *prog;
3762
3763         while (l < r) {
3764                 m = l + (r - l + 1) / 2;
3765                 prog = &obj->programs[m];
3766
3767                 if (prog->sec_idx < sec_idx ||
3768                     (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
3769                         l = m;
3770                 else
3771                         r = m - 1;
3772         }
3773         /* matching program could be at index l, but it still might be the
3774          * wrong one, so we need to double check conditions for the last time
3775          */
3776         prog = &obj->programs[l];
3777         if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
3778                 return prog;
3779         return NULL;
3780 }
3781
3782 static int
3783 bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data)
3784 {
3785         Elf_Data *symbols = obj->efile.symbols;
3786         const char *relo_sec_name, *sec_name;
3787         size_t sec_idx = shdr->sh_info;
3788         struct bpf_program *prog;
3789         struct reloc_desc *relos;
3790         int err, i, nrels;
3791         const char *sym_name;
3792         __u32 insn_idx;
3793         Elf_Scn *scn;
3794         Elf_Data *scn_data;
3795         GElf_Sym sym;
3796         GElf_Rel rel;
3797
3798         scn = elf_sec_by_idx(obj, sec_idx);
3799         scn_data = elf_sec_data(obj, scn);
3800
3801         relo_sec_name = elf_sec_str(obj, shdr->sh_name);
3802         sec_name = elf_sec_name(obj, scn);
3803         if (!relo_sec_name || !sec_name)
3804                 return -EINVAL;
3805
3806         pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
3807                  relo_sec_name, sec_idx, sec_name);
3808         nrels = shdr->sh_size / shdr->sh_entsize;
3809
3810         for (i = 0; i < nrels; i++) {
3811                 if (!gelf_getrel(data, i, &rel)) {
3812                         pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
3813                         return -LIBBPF_ERRNO__FORMAT;
3814                 }
3815                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
3816                         pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
3817                                 relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
3818                         return -LIBBPF_ERRNO__FORMAT;
3819                 }
3820
3821                 if (rel.r_offset % BPF_INSN_SZ || rel.r_offset >= scn_data->d_size) {
3822                         pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
3823                                 relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
3824                         return -LIBBPF_ERRNO__FORMAT;
3825                 }
3826
3827                 insn_idx = rel.r_offset / BPF_INSN_SZ;
3828                 /* relocations against static functions are recorded as
3829                  * relocations against the section that contains a function;
3830                  * in such case, symbol will be STT_SECTION and sym.st_name
3831                  * will point to empty string (0), so fetch section name
3832                  * instead
3833                  */
3834                 if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0)
3835                         sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx));
3836                 else
3837                         sym_name = elf_sym_str(obj, sym.st_name);
3838                 sym_name = sym_name ?: "<?";
3839
3840                 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
3841                          relo_sec_name, i, insn_idx, sym_name);
3842
3843                 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
3844                 if (!prog) {
3845                         pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
3846                                 relo_sec_name, i, sec_name, insn_idx);
3847                         continue;
3848                 }
3849
3850                 relos = libbpf_reallocarray(prog->reloc_desc,
3851                                             prog->nr_reloc + 1, sizeof(*relos));
3852                 if (!relos)
3853                         return -ENOMEM;
3854                 prog->reloc_desc = relos;
3855
3856                 /* adjust insn_idx to local BPF program frame of reference */
3857                 insn_idx -= prog->sec_insn_off;
3858                 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
3859                                                 insn_idx, sym_name, &sym, &rel);
3860                 if (err)
3861                         return err;
3862
3863                 prog->nr_reloc++;
3864         }
3865         return 0;
3866 }
3867
3868 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
3869 {
3870         struct bpf_map_def *def = &map->def;
3871         __u32 key_type_id = 0, value_type_id = 0;
3872         int ret;
3873
3874         /* if it's BTF-defined map, we don't need to search for type IDs.
3875          * For struct_ops map, it does not need btf_key_type_id and
3876          * btf_value_type_id.
3877          */
3878         if (map->sec_idx == obj->efile.btf_maps_shndx ||
3879             bpf_map__is_struct_ops(map))
3880                 return 0;
3881
3882         if (!bpf_map__is_internal(map)) {
3883                 ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
3884                                            def->value_size, &key_type_id,
3885                                            &value_type_id);
3886         } else {
3887                 /*
3888                  * LLVM annotates global data differently in BTF, that is,
3889                  * only as '.data', '.bss' or '.rodata'.
3890                  */
3891                 ret = btf__find_by_name(obj->btf,
3892                                 libbpf_type_to_btf_name[map->libbpf_type]);
3893         }
3894         if (ret < 0)
3895                 return ret;
3896
3897         map->btf_key_type_id = key_type_id;
3898         map->btf_value_type_id = bpf_map__is_internal(map) ?
3899                                  ret : value_type_id;
3900         return 0;
3901 }
3902
3903 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
3904 {
3905         char file[PATH_MAX], buff[4096];
3906         FILE *fp;
3907         __u32 val;
3908         int err;
3909
3910         snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
3911         memset(info, 0, sizeof(*info));
3912
3913         fp = fopen(file, "r");
3914         if (!fp) {
3915                 err = -errno;
3916                 pr_warn("failed to open %s: %d. No procfs support?\n", file,
3917                         err);
3918                 return err;
3919         }
3920
3921         while (fgets(buff, sizeof(buff), fp)) {
3922                 if (sscanf(buff, "map_type:\t%u", &val) == 1)
3923                         info->type = val;
3924                 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
3925                         info->key_size = val;
3926                 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
3927                         info->value_size = val;
3928                 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
3929                         info->max_entries = val;
3930                 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
3931                         info->map_flags = val;
3932         }
3933
3934         fclose(fp);
3935
3936         return 0;
3937 }
3938
3939 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
3940 {
3941         struct bpf_map_info info = {};
3942         __u32 len = sizeof(info);
3943         int new_fd, err;
3944         char *new_name;
3945
3946         err = bpf_obj_get_info_by_fd(fd, &info, &len);
3947         if (err && errno == EINVAL)
3948                 err = bpf_get_map_info_from_fdinfo(fd, &info);
3949         if (err)
3950                 return libbpf_err(err);
3951
3952         new_name = strdup(info.name);
3953         if (!new_name)
3954                 return libbpf_err(-errno);
3955
3956         new_fd = open("/", O_RDONLY | O_CLOEXEC);
3957         if (new_fd < 0) {
3958                 err = -errno;
3959                 goto err_free_new_name;
3960         }
3961
3962         new_fd = dup3(fd, new_fd, O_CLOEXEC);
3963         if (new_fd < 0) {
3964                 err = -errno;
3965                 goto err_close_new_fd;
3966         }
3967
3968         err = zclose(map->fd);
3969         if (err) {
3970                 err = -errno;
3971                 goto err_close_new_fd;
3972         }
3973         free(map->name);
3974
3975         map->fd = new_fd;
3976         map->name = new_name;
3977         map->def.type = info.type;
3978         map->def.key_size = info.key_size;
3979         map->def.value_size = info.value_size;
3980         map->def.max_entries = info.max_entries;
3981         map->def.map_flags = info.map_flags;
3982         map->btf_key_type_id = info.btf_key_type_id;
3983         map->btf_value_type_id = info.btf_value_type_id;
3984         map->reused = true;
3985
3986         return 0;
3987
3988 err_close_new_fd:
3989         close(new_fd);
3990 err_free_new_name:
3991         free(new_name);
3992         return libbpf_err(err);
3993 }
3994
3995 __u32 bpf_map__max_entries(const struct bpf_map *map)
3996 {
3997         return map->def.max_entries;
3998 }
3999
4000 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4001 {
4002         if (!bpf_map_type__is_map_in_map(map->def.type))
4003                 return errno = EINVAL, NULL;
4004
4005         return map->inner_map;
4006 }
4007
4008 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
4009 {
4010         if (map->fd >= 0)
4011                 return libbpf_err(-EBUSY);
4012         map->def.max_entries = max_entries;
4013         return 0;
4014 }
4015
4016 int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
4017 {
4018         if (!map || !max_entries)
4019                 return libbpf_err(-EINVAL);
4020
4021         return bpf_map__set_max_entries(map, max_entries);
4022 }
4023
4024 static int
4025 bpf_object__probe_loading(struct bpf_object *obj)
4026 {
4027         struct bpf_load_program_attr attr;
4028         char *cp, errmsg[STRERR_BUFSIZE];
4029         struct bpf_insn insns[] = {
4030                 BPF_MOV64_IMM(BPF_REG_0, 0),
4031                 BPF_EXIT_INSN(),
4032         };
4033         int ret;
4034
4035         if (obj->gen_loader)
4036                 return 0;
4037
4038         /* make sure basic loading works */
4039
4040         memset(&attr, 0, sizeof(attr));
4041         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
4042         attr.insns = insns;
4043         attr.insns_cnt = ARRAY_SIZE(insns);
4044         attr.license = "GPL";
4045
4046         ret = bpf_load_program_xattr(&attr, NULL, 0);
4047         if (ret < 0) {
4048                 attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
4049                 ret = bpf_load_program_xattr(&attr, NULL, 0);
4050         }
4051         if (ret < 0) {
4052                 ret = errno;
4053                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4054                 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
4055                         "program. Make sure your kernel supports BPF "
4056                         "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
4057                         "set to big enough value.\n", __func__, cp, ret);
4058                 return -ret;
4059         }
4060         close(ret);
4061
4062         return 0;
4063 }
4064
4065 static int probe_fd(int fd)
4066 {
4067         if (fd >= 0)
4068                 close(fd);
4069         return fd >= 0;
4070 }
4071
4072 static int probe_kern_prog_name(void)
4073 {
4074         struct bpf_load_program_attr attr;
4075         struct bpf_insn insns[] = {
4076                 BPF_MOV64_IMM(BPF_REG_0, 0),
4077                 BPF_EXIT_INSN(),
4078         };
4079         int ret;
4080
4081         /* make sure loading with name works */
4082
4083         memset(&attr, 0, sizeof(attr));
4084         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
4085         attr.insns = insns;
4086         attr.insns_cnt = ARRAY_SIZE(insns);
4087         attr.license = "GPL";
4088         attr.name = "test";
4089         ret = bpf_load_program_xattr(&attr, NULL, 0);
4090         return probe_fd(ret);
4091 }
4092
4093 static int probe_kern_global_data(void)
4094 {
4095         struct bpf_load_program_attr prg_attr;
4096         struct bpf_create_map_attr map_attr;
4097         char *cp, errmsg[STRERR_BUFSIZE];
4098         struct bpf_insn insns[] = {
4099                 BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
4100                 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
4101                 BPF_MOV64_IMM(BPF_REG_0, 0),
4102                 BPF_EXIT_INSN(),
4103         };
4104         int ret, map;
4105
4106         memset(&map_attr, 0, sizeof(map_attr));
4107         map_attr.map_type = BPF_MAP_TYPE_ARRAY;
4108         map_attr.key_size = sizeof(int);
4109         map_attr.value_size = 32;
4110         map_attr.max_entries = 1;
4111
4112         map = bpf_create_map_xattr(&map_attr);
4113         if (map < 0) {
4114                 ret = -errno;
4115                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4116                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4117                         __func__, cp, -ret);
4118                 return ret;
4119         }
4120
4121         insns[0].imm = map;
4122
4123         memset(&prg_attr, 0, sizeof(prg_attr));
4124         prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
4125         prg_attr.insns = insns;
4126         prg_attr.insns_cnt = ARRAY_SIZE(insns);
4127         prg_attr.license = "GPL";
4128
4129         ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
4130         close(map);
4131         return probe_fd(ret);
4132 }
4133
4134 static int probe_kern_btf(void)
4135 {
4136         static const char strs[] = "\0int";
4137         __u32 types[] = {
4138                 /* int */
4139                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4140         };
4141
4142         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4143                                              strs, sizeof(strs)));
4144 }
4145
4146 static int probe_kern_btf_func(void)
4147 {
4148         static const char strs[] = "\0int\0x\0a";
4149         /* void x(int a) {} */
4150         __u32 types[] = {
4151                 /* int */
4152                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4153                 /* FUNC_PROTO */                                /* [2] */
4154                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4155                 BTF_PARAM_ENC(7, 1),
4156                 /* FUNC x */                                    /* [3] */
4157                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
4158         };
4159
4160         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4161                                              strs, sizeof(strs)));
4162 }
4163
4164 static int probe_kern_btf_func_global(void)
4165 {
4166         static const char strs[] = "\0int\0x\0a";
4167         /* static void x(int a) {} */
4168         __u32 types[] = {
4169                 /* int */
4170                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4171                 /* FUNC_PROTO */                                /* [2] */
4172                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4173                 BTF_PARAM_ENC(7, 1),
4174                 /* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
4175                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
4176         };
4177
4178         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4179                                              strs, sizeof(strs)));
4180 }
4181
4182 static int probe_kern_btf_datasec(void)
4183 {
4184         static const char strs[] = "\0x\0.data";
4185         /* static int a; */
4186         __u32 types[] = {
4187                 /* int */
4188                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4189                 /* VAR x */                                     /* [2] */
4190                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
4191                 BTF_VAR_STATIC,
4192                 /* DATASEC val */                               /* [3] */
4193                 BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
4194                 BTF_VAR_SECINFO_ENC(2, 0, 4),
4195         };
4196
4197         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4198                                              strs, sizeof(strs)));
4199 }
4200
4201 static int probe_kern_btf_float(void)
4202 {
4203         static const char strs[] = "\0float";
4204         __u32 types[] = {
4205                 /* float */
4206                 BTF_TYPE_FLOAT_ENC(1, 4),
4207         };
4208
4209         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4210                                              strs, sizeof(strs)));
4211 }
4212
4213 static int probe_kern_array_mmap(void)
4214 {
4215         struct bpf_create_map_attr attr = {
4216                 .map_type = BPF_MAP_TYPE_ARRAY,
4217                 .map_flags = BPF_F_MMAPABLE,
4218                 .key_size = sizeof(int),
4219                 .value_size = sizeof(int),
4220                 .max_entries = 1,
4221         };
4222
4223         return probe_fd(bpf_create_map_xattr(&attr));
4224 }
4225
4226 static int probe_kern_exp_attach_type(void)
4227 {
4228         struct bpf_load_program_attr attr;
4229         struct bpf_insn insns[] = {
4230                 BPF_MOV64_IMM(BPF_REG_0, 0),
4231                 BPF_EXIT_INSN(),
4232         };
4233
4234         memset(&attr, 0, sizeof(attr));
4235         /* use any valid combination of program type and (optional)
4236          * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
4237          * to see if kernel supports expected_attach_type field for
4238          * BPF_PROG_LOAD command
4239          */
4240         attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
4241         attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
4242         attr.insns = insns;
4243         attr.insns_cnt = ARRAY_SIZE(insns);
4244         attr.license = "GPL";
4245
4246         return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
4247 }
4248
4249 static int probe_kern_probe_read_kernel(void)
4250 {
4251         struct bpf_load_program_attr attr;
4252         struct bpf_insn insns[] = {
4253                 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),   /* r1 = r10 (fp) */
4254                 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),  /* r1 += -8 */
4255                 BPF_MOV64_IMM(BPF_REG_2, 8),            /* r2 = 8 */
4256                 BPF_MOV64_IMM(BPF_REG_3, 0),            /* r3 = 0 */
4257                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
4258                 BPF_EXIT_INSN(),
4259         };
4260
4261         memset(&attr, 0, sizeof(attr));
4262         attr.prog_type = BPF_PROG_TYPE_KPROBE;
4263         attr.insns = insns;
4264         attr.insns_cnt = ARRAY_SIZE(insns);
4265         attr.license = "GPL";
4266
4267         return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
4268 }
4269
4270 static int probe_prog_bind_map(void)
4271 {
4272         struct bpf_load_program_attr prg_attr;
4273         struct bpf_create_map_attr map_attr;
4274         char *cp, errmsg[STRERR_BUFSIZE];
4275         struct bpf_insn insns[] = {
4276                 BPF_MOV64_IMM(BPF_REG_0, 0),
4277                 BPF_EXIT_INSN(),
4278         };
4279         int ret, map, prog;
4280
4281         memset(&map_attr, 0, sizeof(map_attr));
4282         map_attr.map_type = BPF_MAP_TYPE_ARRAY;
4283         map_attr.key_size = sizeof(int);
4284         map_attr.value_size = 32;
4285         map_attr.max_entries = 1;
4286
4287         map = bpf_create_map_xattr(&map_attr);
4288         if (map < 0) {
4289                 ret = -errno;
4290                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4291                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4292                         __func__, cp, -ret);
4293                 return ret;
4294         }
4295
4296         memset(&prg_attr, 0, sizeof(prg_attr));
4297         prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
4298         prg_attr.insns = insns;
4299         prg_attr.insns_cnt = ARRAY_SIZE(insns);
4300         prg_attr.license = "GPL";
4301
4302         prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
4303         if (prog < 0) {
4304                 close(map);
4305                 return 0;
4306         }
4307
4308         ret = bpf_prog_bind_map(prog, map, NULL);
4309
4310         close(map);
4311         close(prog);
4312
4313         return ret >= 0;
4314 }
4315
4316 static int probe_module_btf(void)
4317 {
4318         static const char strs[] = "\0int";
4319         __u32 types[] = {
4320                 /* int */
4321                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4322         };
4323         struct bpf_btf_info info;
4324         __u32 len = sizeof(info);
4325         char name[16];
4326         int fd, err;
4327
4328         fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
4329         if (fd < 0)
4330                 return 0; /* BTF not supported at all */
4331
4332         memset(&info, 0, sizeof(info));
4333         info.name = ptr_to_u64(name);
4334         info.name_len = sizeof(name);
4335
4336         /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
4337          * kernel's module BTF support coincides with support for
4338          * name/name_len fields in struct bpf_btf_info.
4339          */
4340         err = bpf_obj_get_info_by_fd(fd, &info, &len);
4341         close(fd);
4342         return !err;
4343 }
4344
4345 enum kern_feature_result {
4346         FEAT_UNKNOWN = 0,
4347         FEAT_SUPPORTED = 1,
4348         FEAT_MISSING = 2,
4349 };
4350
4351 typedef int (*feature_probe_fn)(void);
4352
4353 static struct kern_feature_desc {
4354         const char *desc;
4355         feature_probe_fn probe;
4356         enum kern_feature_result res;
4357 } feature_probes[__FEAT_CNT] = {
4358         [FEAT_PROG_NAME] = {
4359                 "BPF program name", probe_kern_prog_name,
4360         },
4361         [FEAT_GLOBAL_DATA] = {
4362                 "global variables", probe_kern_global_data,
4363         },
4364         [FEAT_BTF] = {
4365                 "minimal BTF", probe_kern_btf,
4366         },
4367         [FEAT_BTF_FUNC] = {
4368                 "BTF functions", probe_kern_btf_func,
4369         },
4370         [FEAT_BTF_GLOBAL_FUNC] = {
4371                 "BTF global function", probe_kern_btf_func_global,
4372         },
4373         [FEAT_BTF_DATASEC] = {
4374                 "BTF data section and variable", probe_kern_btf_datasec,
4375         },
4376         [FEAT_ARRAY_MMAP] = {
4377                 "ARRAY map mmap()", probe_kern_array_mmap,
4378         },
4379         [FEAT_EXP_ATTACH_TYPE] = {
4380                 "BPF_PROG_LOAD expected_attach_type attribute",
4381                 probe_kern_exp_attach_type,
4382         },
4383         [FEAT_PROBE_READ_KERN] = {
4384                 "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
4385         },
4386         [FEAT_PROG_BIND_MAP] = {
4387                 "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
4388         },
4389         [FEAT_MODULE_BTF] = {
4390                 "module BTF support", probe_module_btf,
4391         },
4392         [FEAT_BTF_FLOAT] = {
4393                 "BTF_KIND_FLOAT support", probe_kern_btf_float,
4394         },
4395 };
4396
4397 static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
4398 {
4399         struct kern_feature_desc *feat = &feature_probes[feat_id];
4400         int ret;
4401
4402         if (obj->gen_loader)
4403                 /* To generate loader program assume the latest kernel
4404                  * to avoid doing extra prog_load, map_create syscalls.
4405                  */
4406                 return true;
4407
4408         if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
4409                 ret = feat->probe();
4410                 if (ret > 0) {
4411                         WRITE_ONCE(feat->res, FEAT_SUPPORTED);
4412                 } else if (ret == 0) {
4413                         WRITE_ONCE(feat->res, FEAT_MISSING);
4414                 } else {
4415                         pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
4416                         WRITE_ONCE(feat->res, FEAT_MISSING);
4417                 }
4418         }
4419
4420         return READ_ONCE(feat->res) == FEAT_SUPPORTED;
4421 }
4422
4423 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4424 {
4425         struct bpf_map_info map_info = {};
4426         char msg[STRERR_BUFSIZE];
4427         __u32 map_info_len;
4428         int err;
4429
4430         map_info_len = sizeof(map_info);
4431
4432         err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
4433         if (err && errno == EINVAL)
4434                 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4435         if (err) {
4436                 pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4437                         libbpf_strerror_r(errno, msg, sizeof(msg)));
4438                 return false;
4439         }
4440
4441         return (map_info.type == map->def.type &&
4442                 map_info.key_size == map->def.key_size &&
4443                 map_info.value_size == map->def.value_size &&
4444                 map_info.max_entries == map->def.max_entries &&
4445                 map_info.map_flags == map->def.map_flags);
4446 }
4447
4448 static int
4449 bpf_object__reuse_map(struct bpf_map *map)
4450 {
4451         char *cp, errmsg[STRERR_BUFSIZE];
4452         int err, pin_fd;
4453
4454         pin_fd = bpf_obj_get(map->pin_path);
4455         if (pin_fd < 0) {
4456                 err = -errno;
4457                 if (err == -ENOENT) {
4458                         pr_debug("found no pinned map to reuse at '%s'\n",
4459                                  map->pin_path);
4460                         return 0;
4461                 }
4462
4463                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4464                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
4465                         map->pin_path, cp);
4466                 return err;
4467         }
4468
4469         if (!map_is_reuse_compat(map, pin_fd)) {
4470                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4471                         map->pin_path);
4472                 close(pin_fd);
4473                 return -EINVAL;
4474         }
4475
4476         err = bpf_map__reuse_fd(map, pin_fd);
4477         if (err) {
4478                 close(pin_fd);
4479                 return err;
4480         }
4481         map->pinned = true;
4482         pr_debug("reused pinned map at '%s'\n", map->pin_path);
4483
4484         return 0;
4485 }
4486
4487 static int
4488 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4489 {
4490         enum libbpf_map_type map_type = map->libbpf_type;
4491         char *cp, errmsg[STRERR_BUFSIZE];
4492         int err, zero = 0;
4493
4494         if (obj->gen_loader) {
4495                 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
4496                                          map->mmaped, map->def.value_size);
4497                 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
4498                         bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
4499                 return 0;
4500         }
4501         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4502         if (err) {
4503                 err = -errno;
4504                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4505                 pr_warn("Error setting initial map(%s) contents: %s\n",
4506                         map->name, cp);
4507                 return err;
4508         }
4509
4510         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4511         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
4512                 err = bpf_map_freeze(map->fd);
4513                 if (err) {
4514                         err = -errno;
4515                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4516                         pr_warn("Error freezing map(%s) as read-only: %s\n",
4517                                 map->name, cp);
4518                         return err;
4519                 }
4520         }
4521         return 0;
4522 }
4523
4524 static void bpf_map__destroy(struct bpf_map *map);
4525
4526 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
4527 {
4528         struct bpf_create_map_attr create_attr;
4529         struct bpf_map_def *def = &map->def;
4530         int err = 0;
4531
4532         memset(&create_attr, 0, sizeof(create_attr));
4533
4534         if (kernel_supports(obj, FEAT_PROG_NAME))
4535                 create_attr.name = map->name;
4536         create_attr.map_ifindex = map->map_ifindex;
4537         create_attr.map_type = def->type;
4538         create_attr.map_flags = def->map_flags;
4539         create_attr.key_size = def->key_size;
4540         create_attr.value_size = def->value_size;
4541         create_attr.numa_node = map->numa_node;
4542
4543         if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
4544                 int nr_cpus;
4545
4546                 nr_cpus = libbpf_num_possible_cpus();
4547                 if (nr_cpus < 0) {
4548                         pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
4549                                 map->name, nr_cpus);
4550                         return nr_cpus;
4551                 }
4552                 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
4553                 create_attr.max_entries = nr_cpus;
4554         } else {
4555                 create_attr.max_entries = def->max_entries;
4556         }
4557
4558         if (bpf_map__is_struct_ops(map))
4559                 create_attr.btf_vmlinux_value_type_id =
4560                         map->btf_vmlinux_value_type_id;
4561
4562         create_attr.btf_fd = 0;
4563         create_attr.btf_key_type_id = 0;
4564         create_attr.btf_value_type_id = 0;
4565         if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
4566                 create_attr.btf_fd = btf__fd(obj->btf);
4567                 create_attr.btf_key_type_id = map->btf_key_type_id;
4568                 create_attr.btf_value_type_id = map->btf_value_type_id;
4569         }
4570
4571         if (bpf_map_type__is_map_in_map(def->type)) {
4572                 if (map->inner_map) {
4573                         err = bpf_object__create_map(obj, map->inner_map, true);
4574                         if (err) {
4575                                 pr_warn("map '%s': failed to create inner map: %d\n",
4576                                         map->name, err);
4577                                 return err;
4578                         }
4579                         map->inner_map_fd = bpf_map__fd(map->inner_map);
4580                 }
4581                 if (map->inner_map_fd >= 0)
4582                         create_attr.inner_map_fd = map->inner_map_fd;
4583         }
4584
4585         if (obj->gen_loader) {
4586                 bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps);
4587                 /* Pretend to have valid FD to pass various fd >= 0 checks.
4588                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
4589                  */
4590                 map->fd = 0;
4591         } else {
4592                 map->fd = bpf_create_map_xattr(&create_attr);
4593         }
4594         if (map->fd < 0 && (create_attr.btf_key_type_id ||
4595                             create_attr.btf_value_type_id)) {
4596                 char *cp, errmsg[STRERR_BUFSIZE];
4597
4598                 err = -errno;
4599                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4600                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
4601                         map->name, cp, err);
4602                 create_attr.btf_fd = 0;
4603                 create_attr.btf_key_type_id = 0;
4604                 create_attr.btf_value_type_id = 0;
4605                 map->btf_key_type_id = 0;
4606                 map->btf_value_type_id = 0;
4607                 map->fd = bpf_create_map_xattr(&create_attr);
4608         }
4609
4610         err = map->fd < 0 ? -errno : 0;
4611
4612         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
4613                 if (obj->gen_loader)
4614                         map->inner_map->fd = -1;
4615                 bpf_map__destroy(map->inner_map);
4616                 zfree(&map->inner_map);
4617         }
4618
4619         return err;
4620 }
4621
4622 static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
4623 {
4624         const struct bpf_map *targ_map;
4625         unsigned int i;
4626         int fd, err = 0;
4627
4628         for (i = 0; i < map->init_slots_sz; i++) {
4629                 if (!map->init_slots[i])
4630                         continue;
4631
4632                 targ_map = map->init_slots[i];
4633                 fd = bpf_map__fd(targ_map);
4634                 if (obj->gen_loader) {
4635                         pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n",
4636                                 map - obj->maps, i, targ_map - obj->maps);
4637                         return -ENOTSUP;
4638                 } else {
4639                         err = bpf_map_update_elem(map->fd, &i, &fd, 0);
4640                 }
4641                 if (err) {
4642                         err = -errno;
4643                         pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
4644                                 map->name, i, targ_map->name,
4645                                 fd, err);
4646                         return err;
4647                 }
4648                 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
4649                          map->name, i, targ_map->name, fd);
4650         }
4651
4652         zfree(&map->init_slots);
4653         map->init_slots_sz = 0;
4654
4655         return 0;
4656 }
4657
4658 static int
4659 bpf_object__create_maps(struct bpf_object *obj)
4660 {
4661         struct bpf_map *map;
4662         char *cp, errmsg[STRERR_BUFSIZE];
4663         unsigned int i, j;
4664         int err;
4665
4666         for (i = 0; i < obj->nr_maps; i++) {
4667                 map = &obj->maps[i];
4668
4669                 if (map->pin_path) {
4670                         err = bpf_object__reuse_map(map);
4671                         if (err) {
4672                                 pr_warn("map '%s': error reusing pinned map\n",
4673                                         map->name);
4674                                 goto err_out;
4675                         }
4676                 }
4677
4678                 if (map->fd >= 0) {
4679                         pr_debug("map '%s': skipping creation (preset fd=%d)\n",
4680                                  map->name, map->fd);
4681                 } else {
4682                         err = bpf_object__create_map(obj, map, false);
4683                         if (err)
4684                                 goto err_out;
4685
4686                         pr_debug("map '%s': created successfully, fd=%d\n",
4687                                  map->name, map->fd);
4688
4689                         if (bpf_map__is_internal(map)) {
4690                                 err = bpf_object__populate_internal_map(obj, map);
4691                                 if (err < 0) {
4692                                         zclose(map->fd);
4693                                         goto err_out;
4694                                 }
4695                         }
4696
4697                         if (map->init_slots_sz) {
4698                                 err = init_map_slots(obj, map);
4699                                 if (err < 0) {
4700                                         zclose(map->fd);
4701                                         goto err_out;
4702                                 }
4703                         }
4704                 }
4705
4706                 if (map->pin_path && !map->pinned) {
4707                         err = bpf_map__pin(map, NULL);
4708                         if (err) {
4709                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
4710                                         map->name, map->pin_path, err);
4711                                 zclose(map->fd);
4712                                 goto err_out;
4713                         }
4714                 }
4715         }
4716
4717         return 0;
4718
4719 err_out:
4720         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4721         pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
4722         pr_perm_msg(err);
4723         for (j = 0; j < i; j++)
4724                 zclose(obj->maps[j].fd);
4725         return err;
4726 }
4727
4728 #define BPF_CORE_SPEC_MAX_LEN 64
4729
4730 /* represents BPF CO-RE field or array element accessor */
4731 struct bpf_core_accessor {
4732         __u32 type_id;          /* struct/union type or array element type */
4733         __u32 idx;              /* field index or array index */
4734         const char *name;       /* field name or NULL for array accessor */
4735 };
4736
4737 struct bpf_core_spec {
4738         const struct btf *btf;
4739         /* high-level spec: named fields and array indices only */
4740         struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
4741         /* original unresolved (no skip_mods_or_typedefs) root type ID */
4742         __u32 root_type_id;
4743         /* CO-RE relocation kind */
4744         enum bpf_core_relo_kind relo_kind;
4745         /* high-level spec length */
4746         int len;
4747         /* raw, low-level spec: 1-to-1 with accessor spec string */
4748         int raw_spec[BPF_CORE_SPEC_MAX_LEN];
4749         /* raw spec length */
4750         int raw_len;
4751         /* field bit offset represented by spec */
4752         __u32 bit_offset;
4753 };
4754
4755 static bool str_is_empty(const char *s)
4756 {
4757         return !s || !s[0];
4758 }
4759
4760 static bool is_flex_arr(const struct btf *btf,
4761                         const struct bpf_core_accessor *acc,
4762                         const struct btf_array *arr)
4763 {
4764         const struct btf_type *t;
4765
4766         /* not a flexible array, if not inside a struct or has non-zero size */
4767         if (!acc->name || arr->nelems > 0)
4768                 return false;
4769
4770         /* has to be the last member of enclosing struct */
4771         t = btf__type_by_id(btf, acc->type_id);
4772         return acc->idx == btf_vlen(t) - 1;
4773 }
4774
4775 static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
4776 {
4777         switch (kind) {
4778         case BPF_FIELD_BYTE_OFFSET: return "byte_off";
4779         case BPF_FIELD_BYTE_SIZE: return "byte_sz";
4780         case BPF_FIELD_EXISTS: return "field_exists";
4781         case BPF_FIELD_SIGNED: return "signed";
4782         case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
4783         case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
4784         case BPF_TYPE_ID_LOCAL: return "local_type_id";
4785         case BPF_TYPE_ID_TARGET: return "target_type_id";
4786         case BPF_TYPE_EXISTS: return "type_exists";
4787         case BPF_TYPE_SIZE: return "type_size";
4788         case BPF_ENUMVAL_EXISTS: return "enumval_exists";
4789         case BPF_ENUMVAL_VALUE: return "enumval_value";
4790         default: return "unknown";
4791         }
4792 }
4793
4794 static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
4795 {
4796         switch (kind) {
4797         case BPF_FIELD_BYTE_OFFSET:
4798         case BPF_FIELD_BYTE_SIZE:
4799         case BPF_FIELD_EXISTS:
4800         case BPF_FIELD_SIGNED:
4801         case BPF_FIELD_LSHIFT_U64:
4802         case BPF_FIELD_RSHIFT_U64:
4803                 return true;
4804         default:
4805                 return false;
4806         }
4807 }
4808
4809 static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
4810 {
4811         switch (kind) {
4812         case BPF_TYPE_ID_LOCAL:
4813         case BPF_TYPE_ID_TARGET:
4814         case BPF_TYPE_EXISTS:
4815         case BPF_TYPE_SIZE:
4816                 return true;
4817         default:
4818                 return false;
4819         }
4820 }
4821
4822 static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
4823 {
4824         switch (kind) {
4825         case BPF_ENUMVAL_EXISTS:
4826         case BPF_ENUMVAL_VALUE:
4827                 return true;
4828         default:
4829                 return false;
4830         }
4831 }
4832
4833 /*
4834  * Turn bpf_core_relo into a low- and high-level spec representation,
4835  * validating correctness along the way, as well as calculating resulting
4836  * field bit offset, specified by accessor string. Low-level spec captures
4837  * every single level of nestedness, including traversing anonymous
4838  * struct/union members. High-level one only captures semantically meaningful
4839  * "turning points": named fields and array indicies.
4840  * E.g., for this case:
4841  *
4842  *   struct sample {
4843  *       int __unimportant;
4844  *       struct {
4845  *           int __1;
4846  *           int __2;
4847  *           int a[7];
4848  *       };
4849  *   };
4850  *
4851  *   struct sample *s = ...;
4852  *
4853  *   int x = &s->a[3]; // access string = '0:1:2:3'
4854  *
4855  * Low-level spec has 1:1 mapping with each element of access string (it's
4856  * just a parsed access string representation): [0, 1, 2, 3].
4857  *
4858  * High-level spec will capture only 3 points:
4859  *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
4860  *   - field 'a' access (corresponds to '2' in low-level spec);
4861  *   - array element #3 access (corresponds to '3' in low-level spec).
4862  *
4863  * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
4864  * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
4865  * spec and raw_spec are kept empty.
4866  *
4867  * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
4868  * string to specify enumerator's value index that need to be relocated.
4869  */
4870 static int bpf_core_parse_spec(const struct btf *btf,
4871                                __u32 type_id,
4872                                const char *spec_str,
4873                                enum bpf_core_relo_kind relo_kind,
4874                                struct bpf_core_spec *spec)
4875 {
4876         int access_idx, parsed_len, i;
4877         struct bpf_core_accessor *acc;
4878         const struct btf_type *t;
4879         const char *name;
4880         __u32 id;
4881         __s64 sz;
4882
4883         if (str_is_empty(spec_str) || *spec_str == ':')
4884                 return -EINVAL;
4885
4886         memset(spec, 0, sizeof(*spec));
4887         spec->btf = btf;
4888         spec->root_type_id = type_id;
4889         spec->relo_kind = relo_kind;
4890
4891         /* type-based relocations don't have a field access string */
4892         if (core_relo_is_type_based(relo_kind)) {
4893                 if (strcmp(spec_str, "0"))
4894                         return -EINVAL;
4895                 return 0;
4896         }
4897
4898         /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
4899         while (*spec_str) {
4900                 if (*spec_str == ':')
4901                         ++spec_str;
4902                 if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
4903                         return -EINVAL;
4904                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4905                         return -E2BIG;
4906                 spec_str += parsed_len;
4907                 spec->raw_spec[spec->raw_len++] = access_idx;
4908         }
4909
4910         if (spec->raw_len == 0)
4911                 return -EINVAL;
4912
4913         t = skip_mods_and_typedefs(btf, type_id, &id);
4914         if (!t)
4915                 return -EINVAL;
4916
4917         access_idx = spec->raw_spec[0];
4918         acc = &spec->spec[0];
4919         acc->type_id = id;
4920         acc->idx = access_idx;
4921         spec->len++;
4922
4923         if (core_relo_is_enumval_based(relo_kind)) {
4924                 if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
4925                         return -EINVAL;
4926
4927                 /* record enumerator name in a first accessor */
4928                 acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
4929                 return 0;
4930         }
4931
4932         if (!core_relo_is_field_based(relo_kind))
4933                 return -EINVAL;
4934
4935         sz = btf__resolve_size(btf, id);
4936         if (sz < 0)
4937                 return sz;
4938         spec->bit_offset = access_idx * sz * 8;
4939
4940         for (i = 1; i < spec->raw_len; i++) {
4941                 t = skip_mods_and_typedefs(btf, id, &id);
4942                 if (!t)
4943                         return -EINVAL;
4944
4945                 access_idx = spec->raw_spec[i];
4946                 acc = &spec->spec[spec->len];
4947
4948                 if (btf_is_composite(t)) {
4949                         const struct btf_member *m;
4950                         __u32 bit_offset;
4951
4952                         if (access_idx >= btf_vlen(t))
4953                                 return -EINVAL;
4954
4955                         bit_offset = btf_member_bit_offset(t, access_idx);
4956                         spec->bit_offset += bit_offset;
4957
4958                         m = btf_members(t) + access_idx;
4959                         if (m->name_off) {
4960                                 name = btf__name_by_offset(btf, m->name_off);
4961                                 if (str_is_empty(name))
4962                                         return -EINVAL;
4963
4964                                 acc->type_id = id;
4965                                 acc->idx = access_idx;
4966                                 acc->name = name;
4967                                 spec->len++;
4968                         }
4969
4970                         id = m->type;
4971                 } else if (btf_is_array(t)) {
4972                         const struct btf_array *a = btf_array(t);
4973                         bool flex;
4974
4975                         t = skip_mods_and_typedefs(btf, a->type, &id);
4976                         if (!t)
4977                                 return -EINVAL;
4978
4979                         flex = is_flex_arr(btf, acc - 1, a);
4980                         if (!flex && access_idx >= a->nelems)
4981                                 return -EINVAL;
4982
4983                         spec->spec[spec->len].type_id = id;
4984                         spec->spec[spec->len].idx = access_idx;
4985                         spec->len++;
4986
4987                         sz = btf__resolve_size(btf, id);
4988                         if (sz < 0)
4989                                 return sz;
4990                         spec->bit_offset += access_idx * sz * 8;
4991                 } else {
4992                         pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
4993                                 type_id, spec_str, i, id, btf_kind_str(t));
4994                         return -EINVAL;
4995                 }
4996         }
4997
4998         return 0;
4999 }
5000
5001 static bool bpf_core_is_flavor_sep(const char *s)
5002 {
5003         /* check X___Y name pattern, where X and Y are not underscores */
5004         return s[0] != '_' &&                                 /* X */
5005                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
5006                s[4] != '_';                                   /* Y */
5007 }
5008
5009 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5010  * before last triple underscore. Struct name part after last triple
5011  * underscore is ignored by BPF CO-RE relocation during relocation matching.
5012  */
5013 static size_t bpf_core_essential_name_len(const char *name)
5014 {
5015         size_t n = strlen(name);
5016         int i;
5017
5018         for (i = n - 5; i >= 0; i--) {
5019                 if (bpf_core_is_flavor_sep(name + i))
5020                         return i + 1;
5021         }
5022         return n;
5023 }
5024
5025 struct core_cand
5026 {
5027         const struct btf *btf;
5028         const struct btf_type *t;
5029         const char *name;
5030         __u32 id;
5031 };
5032
5033 /* dynamically sized list of type IDs and its associated struct btf */
5034 struct core_cand_list {
5035         struct core_cand *cands;
5036         int len;
5037 };
5038
5039 static void bpf_core_free_cands(struct core_cand_list *cands)
5040 {
5041         free(cands->cands);
5042         free(cands);
5043 }
5044
5045 static int bpf_core_add_cands(struct core_cand *local_cand,
5046                               size_t local_essent_len,
5047                               const struct btf *targ_btf,
5048                               const char *targ_btf_name,
5049                               int targ_start_id,
5050                               struct core_cand_list *cands)
5051 {
5052         struct core_cand *new_cands, *cand;
5053         const struct btf_type *t;
5054         const char *targ_name;
5055         size_t targ_essent_len;
5056         int n, i;
5057
5058         n = btf__get_nr_types(targ_btf);
5059         for (i = targ_start_id; i <= n; i++) {
5060                 t = btf__type_by_id(targ_btf, i);
5061                 if (btf_kind(t) != btf_kind(local_cand->t))
5062                         continue;
5063
5064                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
5065                 if (str_is_empty(targ_name))
5066                         continue;
5067
5068                 targ_essent_len = bpf_core_essential_name_len(targ_name);
5069                 if (targ_essent_len != local_essent_len)
5070                         continue;
5071
5072                 if (strncmp(local_cand->name, targ_name, local_essent_len) != 0)
5073                         continue;
5074
5075                 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5076                          local_cand->id, btf_kind_str(local_cand->t),
5077                          local_cand->name, i, btf_kind_str(t), targ_name,
5078                          targ_btf_name);
5079                 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5080                                               sizeof(*cands->cands));
5081                 if (!new_cands)
5082                         return -ENOMEM;
5083
5084                 cand = &new_cands[cands->len];
5085                 cand->btf = targ_btf;
5086                 cand->t = t;
5087                 cand->name = targ_name;
5088                 cand->id = i;
5089
5090                 cands->cands = new_cands;
5091                 cands->len++;
5092         }
5093         return 0;
5094 }
5095
5096 static int load_module_btfs(struct bpf_object *obj)
5097 {
5098         struct bpf_btf_info info;
5099         struct module_btf *mod_btf;
5100         struct btf *btf;
5101         char name[64];
5102         __u32 id = 0, len;
5103         int err, fd;
5104
5105         if (obj->btf_modules_loaded)
5106                 return 0;
5107
5108         if (obj->gen_loader)
5109                 return 0;
5110
5111         /* don't do this again, even if we find no module BTFs */
5112         obj->btf_modules_loaded = true;
5113
5114         /* kernel too old to support module BTFs */
5115         if (!kernel_supports(obj, FEAT_MODULE_BTF))
5116                 return 0;
5117
5118         while (true) {
5119                 err = bpf_btf_get_next_id(id, &id);
5120                 if (err && errno == ENOENT)
5121                         return 0;
5122                 if (err) {
5123                         err = -errno;
5124                         pr_warn("failed to iterate BTF objects: %d\n", err);
5125                         return err;
5126                 }
5127
5128                 fd = bpf_btf_get_fd_by_id(id);
5129                 if (fd < 0) {
5130                         if (errno == ENOENT)
5131                                 continue; /* expected race: BTF was unloaded */
5132                         err = -errno;
5133                         pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
5134                         return err;
5135                 }
5136
5137                 len = sizeof(info);
5138                 memset(&info, 0, sizeof(info));
5139                 info.name = ptr_to_u64(name);
5140                 info.name_len = sizeof(name);
5141
5142                 err = bpf_obj_get_info_by_fd(fd, &info, &len);
5143                 if (err) {
5144                         err = -errno;
5145                         pr_warn("failed to get BTF object #%d info: %d\n", id, err);
5146                         goto err_out;
5147                 }
5148
5149                 /* ignore non-module BTFs */
5150                 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5151                         close(fd);
5152                         continue;
5153                 }
5154
5155                 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5156                 err = libbpf_get_error(btf);
5157                 if (err) {
5158                         pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5159                                 name, id, err);
5160                         goto err_out;
5161                 }
5162
5163                 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5164                                         sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5165                 if (err)
5166                         goto err_out;
5167
5168                 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5169
5170                 mod_btf->btf = btf;
5171                 mod_btf->id = id;
5172                 mod_btf->fd = fd;
5173                 mod_btf->name = strdup(name);
5174                 if (!mod_btf->name) {
5175                         err = -ENOMEM;
5176                         goto err_out;
5177                 }
5178                 continue;
5179
5180 err_out:
5181                 close(fd);
5182                 return err;
5183         }
5184
5185         return 0;
5186 }
5187
5188 static struct core_cand_list *
5189 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5190 {
5191         struct core_cand local_cand = {};
5192         struct core_cand_list *cands;
5193         const struct btf *main_btf;
5194         size_t local_essent_len;
5195         int err, i;
5196
5197         local_cand.btf = local_btf;
5198         local_cand.t = btf__type_by_id(local_btf, local_type_id);
5199         if (!local_cand.t)
5200                 return ERR_PTR(-EINVAL);
5201
5202         local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off);
5203         if (str_is_empty(local_cand.name))
5204                 return ERR_PTR(-EINVAL);
5205         local_essent_len = bpf_core_essential_name_len(local_cand.name);
5206
5207         cands = calloc(1, sizeof(*cands));
5208         if (!cands)
5209                 return ERR_PTR(-ENOMEM);
5210
5211         /* Attempt to find target candidates in vmlinux BTF first */
5212         main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5213         err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5214         if (err)
5215                 goto err_out;
5216
5217         /* if vmlinux BTF has any candidate, don't got for module BTFs */
5218         if (cands->len)
5219                 return cands;
5220
5221         /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5222         if (obj->btf_vmlinux_override)
5223                 return cands;
5224
5225         /* now look through module BTFs, trying to still find candidates */
5226         err = load_module_btfs(obj);
5227         if (err)
5228                 goto err_out;
5229
5230         for (i = 0; i < obj->btf_module_cnt; i++) {
5231                 err = bpf_core_add_cands(&local_cand, local_essent_len,
5232                                          obj->btf_modules[i].btf,
5233                                          obj->btf_modules[i].name,
5234                                          btf__get_nr_types(obj->btf_vmlinux) + 1,
5235                                          cands);
5236                 if (err)
5237                         goto err_out;
5238         }
5239
5240         return cands;
5241 err_out:
5242         bpf_core_free_cands(cands);
5243         return ERR_PTR(err);
5244 }
5245
5246 /* Check two types for compatibility for the purpose of field access
5247  * relocation. const/volatile/restrict and typedefs are skipped to ensure we
5248  * are relocating semantically compatible entities:
5249  *   - any two STRUCTs/UNIONs are compatible and can be mixed;
5250  *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
5251  *   - any two PTRs are always compatible;
5252  *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
5253  *     least one of enums should be anonymous;
5254  *   - for ENUMs, check sizes, names are ignored;
5255  *   - for INT, size and signedness are ignored;
5256  *   - any two FLOATs are always compatible;
5257  *   - for ARRAY, dimensionality is ignored, element types are checked for
5258  *     compatibility recursively;
5259  *   - everything else shouldn't be ever a target of relocation.
5260  * These rules are not set in stone and probably will be adjusted as we get
5261  * more experience with using BPF CO-RE relocations.
5262  */
5263 static int bpf_core_fields_are_compat(const struct btf *local_btf,
5264                                       __u32 local_id,
5265                                       const struct btf *targ_btf,
5266                                       __u32 targ_id)
5267 {
5268         const struct btf_type *local_type, *targ_type;
5269
5270 recur:
5271         local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
5272         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
5273         if (!local_type || !targ_type)
5274                 return -EINVAL;
5275
5276         if (btf_is_composite(local_type) && btf_is_composite(targ_type))
5277                 return 1;
5278         if (btf_kind(local_type) != btf_kind(targ_type))
5279                 return 0;
5280
5281         switch (btf_kind(local_type)) {
5282         case BTF_KIND_PTR:
5283         case BTF_KIND_FLOAT:
5284                 return 1;
5285         case BTF_KIND_FWD:
5286         case BTF_KIND_ENUM: {
5287                 const char *local_name, *targ_name;
5288                 size_t local_len, targ_len;
5289
5290                 local_name = btf__name_by_offset(local_btf,
5291                                                  local_type->name_off);
5292                 targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
5293                 local_len = bpf_core_essential_name_len(local_name);
5294                 targ_len = bpf_core_essential_name_len(targ_name);
5295                 /* one of them is anonymous or both w/ same flavor-less names */
5296                 return local_len == 0 || targ_len == 0 ||
5297                        (local_len == targ_len &&
5298                         strncmp(local_name, targ_name, local_len) == 0);
5299         }
5300         case BTF_KIND_INT:
5301                 /* just reject deprecated bitfield-like integers; all other
5302                  * integers are by default compatible between each other
5303                  */
5304                 return btf_int_offset(local_type) == 0 &&
5305                        btf_int_offset(targ_type) == 0;
5306         case BTF_KIND_ARRAY:
5307                 local_id = btf_array(local_type)->type;
5308                 targ_id = btf_array(targ_type)->type;
5309                 goto recur;
5310         default:
5311                 pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
5312                         btf_kind(local_type), local_id, targ_id);
5313                 return 0;
5314         }
5315 }
5316
5317 /*
5318  * Given single high-level named field accessor in local type, find
5319  * corresponding high-level accessor for a target type. Along the way,
5320  * maintain low-level spec for target as well. Also keep updating target
5321  * bit offset.
5322  *
5323  * Searching is performed through recursive exhaustive enumeration of all
5324  * fields of a struct/union. If there are any anonymous (embedded)
5325  * structs/unions, they are recursively searched as well. If field with
5326  * desired name is found, check compatibility between local and target types,
5327  * before returning result.
5328  *
5329  * 1 is returned, if field is found.
5330  * 0 is returned if no compatible field is found.
5331  * <0 is returned on error.
5332  */
5333 static int bpf_core_match_member(const struct btf *local_btf,
5334                                  const struct bpf_core_accessor *local_acc,
5335                                  const struct btf *targ_btf,
5336                                  __u32 targ_id,
5337                                  struct bpf_core_spec *spec,
5338                                  __u32 *next_targ_id)
5339 {
5340         const struct btf_type *local_type, *targ_type;
5341         const struct btf_member *local_member, *m;
5342         const char *local_name, *targ_name;
5343         __u32 local_id;
5344         int i, n, found;
5345
5346         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
5347         if (!targ_type)
5348                 return -EINVAL;
5349         if (!btf_is_composite(targ_type))
5350                 return 0;
5351
5352         local_id = local_acc->type_id;
5353         local_type = btf__type_by_id(local_btf, local_id);
5354         local_member = btf_members(local_type) + local_acc->idx;
5355         local_name = btf__name_by_offset(local_btf, local_member->name_off);
5356
5357         n = btf_vlen(targ_type);
5358         m = btf_members(targ_type);
5359         for (i = 0; i < n; i++, m++) {
5360                 __u32 bit_offset;
5361
5362                 bit_offset = btf_member_bit_offset(targ_type, i);
5363
5364                 /* too deep struct/union/array nesting */
5365                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
5366                         return -E2BIG;
5367
5368                 /* speculate this member will be the good one */
5369                 spec->bit_offset += bit_offset;
5370                 spec->raw_spec[spec->raw_len++] = i;
5371
5372                 targ_name = btf__name_by_offset(targ_btf, m->name_off);
5373                 if (str_is_empty(targ_name)) {
5374                         /* embedded struct/union, we need to go deeper */
5375                         found = bpf_core_match_member(local_btf, local_acc,
5376                                                       targ_btf, m->type,
5377                                                       spec, next_targ_id);
5378                         if (found) /* either found or error */
5379                                 return found;
5380                 } else if (strcmp(local_name, targ_name) == 0) {
5381                         /* matching named field */
5382                         struct bpf_core_accessor *targ_acc;
5383
5384                         targ_acc = &spec->spec[spec->len++];
5385                         targ_acc->type_id = targ_id;
5386                         targ_acc->idx = i;
5387                         targ_acc->name = targ_name;
5388
5389                         *next_targ_id = m->type;
5390                         found = bpf_core_fields_are_compat(local_btf,
5391                                                            local_member->type,
5392                                                            targ_btf, m->type);
5393                         if (!found)
5394                                 spec->len--; /* pop accessor */
5395                         return found;
5396                 }
5397                 /* member turned out not to be what we looked for */
5398                 spec->bit_offset -= bit_offset;
5399                 spec->raw_len--;
5400         }
5401
5402         return 0;
5403 }
5404
5405 /* Check local and target types for compatibility. This check is used for
5406  * type-based CO-RE relocations and follow slightly different rules than
5407  * field-based relocations. This function assumes that root types were already
5408  * checked for name match. Beyond that initial root-level name check, names
5409  * are completely ignored. Compatibility rules are as follows:
5410  *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5411  *     kind should match for local and target types (i.e., STRUCT is not
5412  *     compatible with UNION);
5413  *   - for ENUMs, the size is ignored;
5414  *   - for INT, size and signedness are ignored;
5415  *   - for ARRAY, dimensionality is ignored, element types are checked for
5416  *     compatibility recursively;
5417  *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
5418  *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5419  *   - FUNC_PROTOs are compatible if they have compatible signature: same
5420  *     number of input args and compatible return and argument types.
5421  * These rules are not set in stone and probably will be adjusted as we get
5422  * more experience with using BPF CO-RE relocations.
5423  */
5424 static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5425                                      const struct btf *targ_btf, __u32 targ_id)
5426 {
5427         const struct btf_type *local_type, *targ_type;
5428         int depth = 32; /* max recursion depth */
5429
5430         /* caller made sure that names match (ignoring flavor suffix) */
5431         local_type = btf__type_by_id(local_btf, local_id);
5432         targ_type = btf__type_by_id(targ_btf, targ_id);
5433         if (btf_kind(local_type) != btf_kind(targ_type))
5434                 return 0;
5435
5436 recur:
5437         depth--;
5438         if (depth < 0)
5439                 return -EINVAL;
5440
5441         local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
5442         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
5443         if (!local_type || !targ_type)
5444                 return -EINVAL;
5445
5446         if (btf_kind(local_type) != btf_kind(targ_type))
5447                 return 0;
5448
5449         switch (btf_kind(local_type)) {
5450         case BTF_KIND_UNKN:
5451         case BTF_KIND_STRUCT:
5452         case BTF_KIND_UNION:
5453         case BTF_KIND_ENUM:
5454         case BTF_KIND_FWD:
5455                 return 1;
5456         case BTF_KIND_INT:
5457                 /* just reject deprecated bitfield-like integers; all other
5458                  * integers are by default compatible between each other
5459                  */
5460                 return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
5461         case BTF_KIND_PTR:
5462                 local_id = local_type->type;
5463                 targ_id = targ_type->type;
5464                 goto recur;
5465         case BTF_KIND_ARRAY:
5466                 local_id = btf_array(local_type)->type;
5467                 targ_id = btf_array(targ_type)->type;
5468                 goto recur;
5469         case BTF_KIND_FUNC_PROTO: {
5470                 struct btf_param *local_p = btf_params(local_type);
5471                 struct btf_param *targ_p = btf_params(targ_type);
5472                 __u16 local_vlen = btf_vlen(local_type);
5473                 __u16 targ_vlen = btf_vlen(targ_type);
5474                 int i, err;
5475
5476                 if (local_vlen != targ_vlen)
5477                         return 0;
5478
5479                 for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
5480                         skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
5481                         skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
5482                         err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
5483                         if (err <= 0)
5484                                 return err;
5485                 }
5486
5487                 /* tail recurse for return type check */
5488                 skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
5489                 skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
5490                 goto recur;
5491         }
5492         default:
5493                 pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
5494                         btf_kind_str(local_type), local_id, targ_id);
5495                 return 0;
5496         }
5497 }
5498
5499 /*
5500  * Try to match local spec to a target type and, if successful, produce full
5501  * target spec (high-level, low-level + bit offset).
5502  */
5503 static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
5504                                const struct btf *targ_btf, __u32 targ_id,
5505                                struct bpf_core_spec *targ_spec)
5506 {
5507         const struct btf_type *targ_type;
5508         const struct bpf_core_accessor *local_acc;
5509         struct bpf_core_accessor *targ_acc;
5510         int i, sz, matched;
5511
5512         memset(targ_spec, 0, sizeof(*targ_spec));
5513         targ_spec->btf = targ_btf;
5514         targ_spec->root_type_id = targ_id;
5515         targ_spec->relo_kind = local_spec->relo_kind;
5516
5517         if (core_relo_is_type_based(local_spec->relo_kind)) {
5518                 return bpf_core_types_are_compat(local_spec->btf,
5519                                                  local_spec->root_type_id,
5520                                                  targ_btf, targ_id);
5521         }
5522
5523         local_acc = &local_spec->spec[0];
5524         targ_acc = &targ_spec->spec[0];
5525
5526         if (core_relo_is_enumval_based(local_spec->relo_kind)) {
5527                 size_t local_essent_len, targ_essent_len;
5528                 const struct btf_enum *e;
5529                 const char *targ_name;
5530
5531                 /* has to resolve to an enum */
5532                 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
5533                 if (!btf_is_enum(targ_type))
5534                         return 0;
5535
5536                 local_essent_len = bpf_core_essential_name_len(local_acc->name);
5537
5538                 for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
5539                         targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
5540                         targ_essent_len = bpf_core_essential_name_len(targ_name);
5541                         if (targ_essent_len != local_essent_len)
5542                                 continue;
5543                         if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
5544                                 targ_acc->type_id = targ_id;
5545                                 targ_acc->idx = i;
5546                                 targ_acc->name = targ_name;
5547                                 targ_spec->len++;
5548                                 targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
5549                                 targ_spec->raw_len++;
5550                                 return 1;
5551                         }
5552                 }
5553                 return 0;
5554         }
5555
5556         if (!core_relo_is_field_based(local_spec->relo_kind))
5557                 return -EINVAL;
5558
5559         for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
5560                 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
5561                                                    &targ_id);
5562                 if (!targ_type)
5563                         return -EINVAL;
5564
5565                 if (local_acc->name) {
5566                         matched = bpf_core_match_member(local_spec->btf,
5567                                                         local_acc,
5568                                                         targ_btf, targ_id,
5569                                                         targ_spec, &targ_id);
5570                         if (matched <= 0)
5571                                 return matched;
5572                 } else {
5573                         /* for i=0, targ_id is already treated as array element
5574                          * type (because it's the original struct), for others
5575                          * we should find array element type first
5576                          */
5577                         if (i > 0) {
5578                                 const struct btf_array *a;
5579                                 bool flex;
5580
5581                                 if (!btf_is_array(targ_type))
5582                                         return 0;
5583
5584                                 a = btf_array(targ_type);
5585                                 flex = is_flex_arr(targ_btf, targ_acc - 1, a);
5586                                 if (!flex && local_acc->idx >= a->nelems)
5587                                         return 0;
5588                                 if (!skip_mods_and_typedefs(targ_btf, a->type,
5589                                                             &targ_id))
5590                                         return -EINVAL;
5591                         }
5592
5593                         /* too deep struct/union/array nesting */
5594                         if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
5595                                 return -E2BIG;
5596
5597                         targ_acc->type_id = targ_id;
5598                         targ_acc->idx = local_acc->idx;
5599                         targ_acc->name = NULL;
5600                         targ_spec->len++;
5601                         targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
5602                         targ_spec->raw_len++;
5603
5604                         sz = btf__resolve_size(targ_btf, targ_id);
5605                         if (sz < 0)
5606                                 return sz;
5607                         targ_spec->bit_offset += local_acc->idx * sz * 8;
5608                 }
5609         }
5610
5611         return 1;
5612 }
5613
5614 static int bpf_core_calc_field_relo(const struct bpf_program *prog,
5615                                     const struct bpf_core_relo *relo,
5616                                     const struct bpf_core_spec *spec,
5617                                     __u32 *val, __u32 *field_sz, __u32 *type_id,
5618                                     bool *validate)
5619 {
5620         const struct bpf_core_accessor *acc;
5621         const struct btf_type *t;
5622         __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
5623         const struct btf_member *m;
5624         const struct btf_type *mt;
5625         bool bitfield;
5626         __s64 sz;
5627
5628         *field_sz = 0;
5629
5630         if (relo->kind == BPF_FIELD_EXISTS) {
5631                 *val = spec ? 1 : 0;
5632                 return 0;
5633         }
5634
5635         if (!spec)
5636                 return -EUCLEAN; /* request instruction poisoning */
5637
5638         acc = &spec->spec[spec->len - 1];
5639         t = btf__type_by_id(spec->btf, acc->type_id);
5640
5641         /* a[n] accessor needs special handling */
5642         if (!acc->name) {
5643                 if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
5644                         *val = spec->bit_offset / 8;
5645                         /* remember field size for load/store mem size */
5646                         sz = btf__resolve_size(spec->btf, acc->type_id);
5647                         if (sz < 0)
5648                                 return -EINVAL;
5649                         *field_sz = sz;
5650                         *type_id = acc->type_id;
5651                 } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
5652                         sz = btf__resolve_size(spec->btf, acc->type_id);
5653                         if (sz < 0)
5654                                 return -EINVAL;
5655                         *val = sz;
5656                 } else {
5657                         pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
5658                                 prog->name, relo->kind, relo->insn_off / 8);
5659                         return -EINVAL;
5660                 }
5661                 if (validate)
5662                         *validate = true;
5663                 return 0;
5664         }
5665
5666         m = btf_members(t) + acc->idx;
5667         mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
5668         bit_off = spec->bit_offset;
5669         bit_sz = btf_member_bitfield_size(t, acc->idx);
5670
5671         bitfield = bit_sz > 0;
5672         if (bitfield) {
5673                 byte_sz = mt->size;
5674                 byte_off = bit_off / 8 / byte_sz * byte_sz;
5675                 /* figure out smallest int size necessary for bitfield load */
5676                 while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
5677                         if (byte_sz >= 8) {
5678                                 /* bitfield can't be read with 64-bit read */
5679                                 pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
5680                                         prog->name, relo->kind, relo->insn_off / 8);
5681                                 return -E2BIG;
5682                         }
5683                         byte_sz *= 2;
5684                         byte_off = bit_off / 8 / byte_sz * byte_sz;
5685                 }
5686         } else {
5687                 sz = btf__resolve_size(spec->btf, field_type_id);
5688                 if (sz < 0)
5689                         return -EINVAL;
5690                 byte_sz = sz;
5691                 byte_off = spec->bit_offset / 8;
5692                 bit_sz = byte_sz * 8;
5693         }
5694
5695         /* for bitfields, all the relocatable aspects are ambiguous and we
5696          * might disagree with compiler, so turn off validation of expected
5697          * value, except for signedness
5698          */
5699         if (validate)
5700                 *validate = !bitfield;
5701
5702         switch (relo->kind) {
5703         case BPF_FIELD_BYTE_OFFSET:
5704                 *val = byte_off;
5705                 if (!bitfield) {
5706                         *field_sz = byte_sz;
5707                         *type_id = field_type_id;
5708                 }
5709                 break;
5710         case BPF_FIELD_BYTE_SIZE:
5711                 *val = byte_sz;
5712                 break;
5713         case BPF_FIELD_SIGNED:
5714                 /* enums will be assumed unsigned */
5715                 *val = btf_is_enum(mt) ||
5716                        (btf_int_encoding(mt) & BTF_INT_SIGNED);
5717                 if (validate)
5718                         *validate = true; /* signedness is never ambiguous */
5719                 break;
5720         case BPF_FIELD_LSHIFT_U64:
5721 #if __BYTE_ORDER == __LITTLE_ENDIAN
5722                 *val = 64 - (bit_off + bit_sz - byte_off  * 8);
5723 #else
5724                 *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
5725 #endif
5726                 break;
5727         case BPF_FIELD_RSHIFT_U64:
5728                 *val = 64 - bit_sz;
5729                 if (validate)
5730                         *validate = true; /* right shift is never ambiguous */
5731                 break;
5732         case BPF_FIELD_EXISTS:
5733         default:
5734                 return -EOPNOTSUPP;
5735         }
5736
5737         return 0;
5738 }
5739
5740 static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
5741                                    const struct bpf_core_spec *spec,
5742                                    __u32 *val)
5743 {
5744         __s64 sz;
5745
5746         /* type-based relos return zero when target type is not found */
5747         if (!spec) {
5748                 *val = 0;
5749                 return 0;
5750         }
5751
5752         switch (relo->kind) {
5753         case BPF_TYPE_ID_TARGET:
5754                 *val = spec->root_type_id;
5755                 break;
5756         case BPF_TYPE_EXISTS:
5757                 *val = 1;
5758                 break;
5759         case BPF_TYPE_SIZE:
5760                 sz = btf__resolve_size(spec->btf, spec->root_type_id);
5761                 if (sz < 0)
5762                         return -EINVAL;
5763                 *val = sz;
5764                 break;
5765         case BPF_TYPE_ID_LOCAL:
5766         /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
5767         default:
5768                 return -EOPNOTSUPP;
5769         }
5770
5771         return 0;
5772 }
5773
5774 static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
5775                                       const struct bpf_core_spec *spec,
5776                                       __u32 *val)
5777 {
5778         const struct btf_type *t;
5779         const struct btf_enum *e;
5780
5781         switch (relo->kind) {
5782         case BPF_ENUMVAL_EXISTS:
5783                 *val = spec ? 1 : 0;
5784                 break;
5785         case BPF_ENUMVAL_VALUE:
5786                 if (!spec)
5787                         return -EUCLEAN; /* request instruction poisoning */
5788                 t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
5789                 e = btf_enum(t) + spec->spec[0].idx;
5790                 *val = e->val;
5791                 break;
5792         default:
5793                 return -EOPNOTSUPP;
5794         }
5795
5796         return 0;
5797 }
5798
5799 struct bpf_core_relo_res
5800 {
5801         /* expected value in the instruction, unless validate == false */
5802         __u32 orig_val;
5803         /* new value that needs to be patched up to */
5804         __u32 new_val;
5805         /* relocation unsuccessful, poison instruction, but don't fail load */
5806         bool poison;
5807         /* some relocations can't be validated against orig_val */
5808         bool validate;
5809         /* for field byte offset relocations or the forms:
5810          *     *(T *)(rX + <off>) = rY
5811          *     rX = *(T *)(rY + <off>),
5812          * we remember original and resolved field size to adjust direct
5813          * memory loads of pointers and integers; this is necessary for 32-bit
5814          * host kernel architectures, but also allows to automatically
5815          * relocate fields that were resized from, e.g., u32 to u64, etc.
5816          */
5817         bool fail_memsz_adjust;
5818         __u32 orig_sz;
5819         __u32 orig_type_id;
5820         __u32 new_sz;
5821         __u32 new_type_id;
5822 };
5823
5824 /* Calculate original and target relocation values, given local and target
5825  * specs and relocation kind. These values are calculated for each candidate.
5826  * If there are multiple candidates, resulting values should all be consistent
5827  * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
5828  * If instruction has to be poisoned, *poison will be set to true.
5829  */
5830 static int bpf_core_calc_relo(const struct bpf_program *prog,
5831                               const struct bpf_core_relo *relo,
5832                               int relo_idx,
5833                               const struct bpf_core_spec *local_spec,
5834                               const struct bpf_core_spec *targ_spec,
5835                               struct bpf_core_relo_res *res)
5836 {
5837         int err = -EOPNOTSUPP;
5838
5839         res->orig_val = 0;
5840         res->new_val = 0;
5841         res->poison = false;
5842         res->validate = true;
5843         res->fail_memsz_adjust = false;
5844         res->orig_sz = res->new_sz = 0;
5845         res->orig_type_id = res->new_type_id = 0;
5846
5847         if (core_relo_is_field_based(relo->kind)) {
5848                 err = bpf_core_calc_field_relo(prog, relo, local_spec,
5849                                                &res->orig_val, &res->orig_sz,
5850                                                &res->orig_type_id, &res->validate);
5851                 err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
5852                                                       &res->new_val, &res->new_sz,
5853                                                       &res->new_type_id, NULL);
5854                 if (err)
5855                         goto done;
5856                 /* Validate if it's safe to adjust load/store memory size.
5857                  * Adjustments are performed only if original and new memory
5858                  * sizes differ.
5859                  */
5860                 res->fail_memsz_adjust = false;
5861                 if (res->orig_sz != res->new_sz) {
5862                         const struct btf_type *orig_t, *new_t;
5863
5864                         orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
5865                         new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
5866
5867                         /* There are two use cases in which it's safe to
5868                          * adjust load/store's mem size:
5869                          *   - reading a 32-bit kernel pointer, while on BPF
5870                          *   size pointers are always 64-bit; in this case
5871                          *   it's safe to "downsize" instruction size due to
5872                          *   pointer being treated as unsigned integer with
5873                          *   zero-extended upper 32-bits;
5874                          *   - reading unsigned integers, again due to
5875                          *   zero-extension is preserving the value correctly.
5876                          *
5877                          * In all other cases it's incorrect to attempt to
5878                          * load/store field because read value will be
5879                          * incorrect, so we poison relocated instruction.
5880                          */
5881                         if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
5882                                 goto done;
5883                         if (btf_is_int(orig_t) && btf_is_int(new_t) &&
5884                             btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
5885                             btf_int_encoding(new_t) != BTF_INT_SIGNED)
5886                                 goto done;
5887
5888                         /* mark as invalid mem size adjustment, but this will
5889                          * only be checked for LDX/STX/ST insns
5890                          */
5891                         res->fail_memsz_adjust = true;
5892                 }
5893         } else if (core_relo_is_type_based(relo->kind)) {
5894                 err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
5895                 err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
5896         } else if (core_relo_is_enumval_based(relo->kind)) {
5897                 err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
5898                 err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
5899         }
5900
5901 done:
5902         if (err == -EUCLEAN) {
5903                 /* EUCLEAN is used to signal instruction poisoning request */
5904                 res->poison = true;
5905                 err = 0;
5906         } else if (err == -EOPNOTSUPP) {
5907                 /* EOPNOTSUPP means unknown/unsupported relocation */
5908                 pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
5909                         prog->name, relo_idx, core_relo_kind_str(relo->kind),
5910                         relo->kind, relo->insn_off / 8);
5911         }
5912
5913         return err;
5914 }
5915
5916 /*
5917  * Turn instruction for which CO_RE relocation failed into invalid one with
5918  * distinct signature.
5919  */
5920 static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
5921                                  int insn_idx, struct bpf_insn *insn)
5922 {
5923         pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
5924                  prog->name, relo_idx, insn_idx);
5925         insn->code = BPF_JMP | BPF_CALL;
5926         insn->dst_reg = 0;
5927         insn->src_reg = 0;
5928         insn->off = 0;
5929         /* if this instruction is reachable (not a dead code),
5930          * verifier will complain with the following message:
5931          * invalid func unknown#195896080
5932          */
5933         insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
5934 }
5935
5936 static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
5937 {
5938         switch (BPF_SIZE(insn->code)) {
5939         case BPF_DW: return 8;
5940         case BPF_W: return 4;
5941         case BPF_H: return 2;
5942         case BPF_B: return 1;
5943         default: return -1;
5944         }
5945 }
5946
5947 static int insn_bytes_to_bpf_size(__u32 sz)
5948 {
5949         switch (sz) {
5950         case 8: return BPF_DW;
5951         case 4: return BPF_W;
5952         case 2: return BPF_H;
5953         case 1: return BPF_B;
5954         default: return -1;
5955         }
5956 }
5957
5958 /*
5959  * Patch relocatable BPF instruction.
5960  *
5961  * Patched value is determined by relocation kind and target specification.
5962  * For existence relocations target spec will be NULL if field/type is not found.
5963  * Expected insn->imm value is determined using relocation kind and local
5964  * spec, and is checked before patching instruction. If actual insn->imm value
5965  * is wrong, bail out with error.
5966  *
5967  * Currently supported classes of BPF instruction are:
5968  * 1. rX = <imm> (assignment with immediate operand);
5969  * 2. rX += <imm> (arithmetic operations with immediate operand);
5970  * 3. rX = <imm64> (load with 64-bit immediate value);
5971  * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
5972  * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
5973  * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
5974  */
5975 static int bpf_core_patch_insn(struct bpf_program *prog,
5976                                const struct bpf_core_relo *relo,
5977                                int relo_idx,
5978                                const struct bpf_core_relo_res *res)
5979 {
5980         __u32 orig_val, new_val;
5981         struct bpf_insn *insn;
5982         int insn_idx;
5983         __u8 class;
5984
5985         if (relo->insn_off % BPF_INSN_SZ)
5986                 return -EINVAL;
5987         insn_idx = relo->insn_off / BPF_INSN_SZ;
5988         /* adjust insn_idx from section frame of reference to the local
5989          * program's frame of reference; (sub-)program code is not yet
5990          * relocated, so it's enough to just subtract in-section offset
5991          */
5992         insn_idx = insn_idx - prog->sec_insn_off;
5993         insn = &prog->insns[insn_idx];
5994         class = BPF_CLASS(insn->code);
5995
5996         if (res->poison) {
5997 poison:
5998                 /* poison second part of ldimm64 to avoid confusing error from
5999                  * verifier about "unknown opcode 00"
6000                  */
6001                 if (is_ldimm64_insn(insn))
6002                         bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
6003                 bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
6004                 return 0;
6005         }
6006
6007         orig_val = res->orig_val;
6008         new_val = res->new_val;
6009
6010         switch (class) {
6011         case BPF_ALU:
6012         case BPF_ALU64:
6013                 if (BPF_SRC(insn->code) != BPF_K)
6014                         return -EINVAL;
6015                 if (res->validate && insn->imm != orig_val) {
6016                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
6017                                 prog->name, relo_idx,
6018                                 insn_idx, insn->imm, orig_val, new_val);
6019                         return -EINVAL;
6020                 }
6021                 orig_val = insn->imm;
6022                 insn->imm = new_val;
6023                 pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
6024                          prog->name, relo_idx, insn_idx,
6025                          orig_val, new_val);
6026                 break;
6027         case BPF_LDX:
6028         case BPF_ST:
6029         case BPF_STX:
6030                 if (res->validate && insn->off != orig_val) {
6031                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
6032                                 prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
6033                         return -EINVAL;
6034                 }
6035                 if (new_val > SHRT_MAX) {
6036                         pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
6037                                 prog->name, relo_idx, insn_idx, new_val);
6038                         return -ERANGE;
6039                 }
6040                 if (res->fail_memsz_adjust) {
6041                         pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
6042                                 "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
6043                                 prog->name, relo_idx, insn_idx);
6044                         goto poison;
6045                 }
6046
6047                 orig_val = insn->off;
6048                 insn->off = new_val;
6049                 pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
6050                          prog->name, relo_idx, insn_idx, orig_val, new_val);
6051
6052                 if (res->new_sz != res->orig_sz) {
6053                         int insn_bytes_sz, insn_bpf_sz;
6054
6055                         insn_bytes_sz = insn_bpf_size_to_bytes(insn);
6056                         if (insn_bytes_sz != res->orig_sz) {
6057                                 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
6058                                         prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
6059                                 return -EINVAL;
6060                         }
6061
6062                         insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
6063                         if (insn_bpf_sz < 0) {
6064                                 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
6065                                         prog->name, relo_idx, insn_idx, res->new_sz);
6066                                 return -EINVAL;
6067                         }
6068
6069                         insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
6070                         pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
6071                                  prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
6072                 }
6073                 break;
6074         case BPF_LD: {
6075                 __u64 imm;
6076
6077                 if (!is_ldimm64_insn(insn) ||
6078                     insn[0].src_reg != 0 || insn[0].off != 0 ||
6079                     insn_idx + 1 >= prog->insns_cnt ||
6080                     insn[1].code != 0 || insn[1].dst_reg != 0 ||
6081                     insn[1].src_reg != 0 || insn[1].off != 0) {
6082                         pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
6083                                 prog->name, relo_idx, insn_idx);
6084                         return -EINVAL;
6085                 }
6086
6087                 imm = insn[0].imm + ((__u64)insn[1].imm << 32);
6088                 if (res->validate && imm != orig_val) {
6089                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
6090                                 prog->name, relo_idx,
6091                                 insn_idx, (unsigned long long)imm,
6092                                 orig_val, new_val);
6093                         return -EINVAL;
6094                 }
6095
6096                 insn[0].imm = new_val;
6097                 insn[1].imm = 0; /* currently only 32-bit values are supported */
6098                 pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
6099                          prog->name, relo_idx, insn_idx,
6100                          (unsigned long long)imm, new_val);
6101                 break;
6102         }
6103         default:
6104                 pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
6105                         prog->name, relo_idx, insn_idx, insn->code,
6106                         insn->src_reg, insn->dst_reg, insn->off, insn->imm);
6107                 return -EINVAL;
6108         }
6109
6110         return 0;
6111 }
6112
6113 /* Output spec definition in the format:
6114  * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
6115  * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
6116  */
6117 static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
6118 {
6119         const struct btf_type *t;
6120         const struct btf_enum *e;
6121         const char *s;
6122         __u32 type_id;
6123         int i;
6124
6125         type_id = spec->root_type_id;
6126         t = btf__type_by_id(spec->btf, type_id);
6127         s = btf__name_by_offset(spec->btf, t->name_off);
6128
6129         libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
6130
6131         if (core_relo_is_type_based(spec->relo_kind))
6132                 return;
6133
6134         if (core_relo_is_enumval_based(spec->relo_kind)) {
6135                 t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
6136                 e = btf_enum(t) + spec->raw_spec[0];
6137                 s = btf__name_by_offset(spec->btf, e->name_off);
6138
6139                 libbpf_print(level, "::%s = %u", s, e->val);
6140                 return;
6141         }
6142
6143         if (core_relo_is_field_based(spec->relo_kind)) {
6144                 for (i = 0; i < spec->len; i++) {
6145                         if (spec->spec[i].name)
6146                                 libbpf_print(level, ".%s", spec->spec[i].name);
6147                         else if (i > 0 || spec->spec[i].idx > 0)
6148                                 libbpf_print(level, "[%u]", spec->spec[i].idx);
6149                 }
6150
6151                 libbpf_print(level, " (");
6152                 for (i = 0; i < spec->raw_len; i++)
6153                         libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
6154
6155                 if (spec->bit_offset % 8)
6156                         libbpf_print(level, " @ offset %u.%u)",
6157                                      spec->bit_offset / 8, spec->bit_offset % 8);
6158                 else
6159                         libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
6160                 return;
6161         }
6162 }
6163
6164 static size_t bpf_core_hash_fn(const void *key, void *ctx)
6165 {
6166         return (size_t)key;
6167 }
6168
6169 static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
6170 {
6171         return k1 == k2;
6172 }
6173
6174 static void *u32_as_hash_key(__u32 x)
6175 {
6176         return (void *)(uintptr_t)x;
6177 }
6178
6179 /*
6180  * CO-RE relocate single instruction.
6181  *
6182  * The outline and important points of the algorithm:
6183  * 1. For given local type, find corresponding candidate target types.
6184  *    Candidate type is a type with the same "essential" name, ignoring
6185  *    everything after last triple underscore (___). E.g., `sample`,
6186  *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
6187  *    for each other. Names with triple underscore are referred to as
6188  *    "flavors" and are useful, among other things, to allow to
6189  *    specify/support incompatible variations of the same kernel struct, which
6190  *    might differ between different kernel versions and/or build
6191  *    configurations.
6192  *
6193  *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
6194  *    converter, when deduplicated BTF of a kernel still contains more than
6195  *    one different types with the same name. In that case, ___2, ___3, etc
6196  *    are appended starting from second name conflict. But start flavors are
6197  *    also useful to be defined "locally", in BPF program, to extract same
6198  *    data from incompatible changes between different kernel
6199  *    versions/configurations. For instance, to handle field renames between
6200  *    kernel versions, one can use two flavors of the struct name with the
6201  *    same common name and use conditional relocations to extract that field,
6202  *    depending on target kernel version.
6203  * 2. For each candidate type, try to match local specification to this
6204  *    candidate target type. Matching involves finding corresponding
6205  *    high-level spec accessors, meaning that all named fields should match,
6206  *    as well as all array accesses should be within the actual bounds. Also,
6207  *    types should be compatible (see bpf_core_fields_are_compat for details).
6208  * 3. It is supported and expected that there might be multiple flavors
6209  *    matching the spec. As long as all the specs resolve to the same set of
6210  *    offsets across all candidates, there is no error. If there is any
6211  *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
6212  *    imprefection of BTF deduplication, which can cause slight duplication of
6213  *    the same BTF type, if some directly or indirectly referenced (by
6214  *    pointer) type gets resolved to different actual types in different
6215  *    object files. If such situation occurs, deduplicated BTF will end up
6216  *    with two (or more) structurally identical types, which differ only in
6217  *    types they refer to through pointer. This should be OK in most cases and
6218  *    is not an error.
6219  * 4. Candidate types search is performed by linearly scanning through all
6220  *    types in target BTF. It is anticipated that this is overall more
6221  *    efficient memory-wise and not significantly worse (if not better)
6222  *    CPU-wise compared to prebuilding a map from all local type names to
6223  *    a list of candidate type names. It's also sped up by caching resolved
6224  *    list of matching candidates per each local "root" type ID, that has at
6225  *    least one bpf_core_relo associated with it. This list is shared
6226  *    between multiple relocations for the same type ID and is updated as some
6227  *    of the candidates are pruned due to structural incompatibility.
6228  */
6229 static int bpf_core_apply_relo(struct bpf_program *prog,
6230                                const struct bpf_core_relo *relo,
6231                                int relo_idx,
6232                                const struct btf *local_btf,
6233                                struct hashmap *cand_cache)
6234 {
6235         struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
6236         const void *type_key = u32_as_hash_key(relo->type_id);
6237         struct bpf_core_relo_res cand_res, targ_res;
6238         const struct btf_type *local_type;
6239         const char *local_name;
6240         struct core_cand_list *cands = NULL;
6241         __u32 local_id;
6242         const char *spec_str;
6243         int i, j, err;
6244
6245         local_id = relo->type_id;
6246         local_type = btf__type_by_id(local_btf, local_id);
6247         if (!local_type)
6248                 return -EINVAL;
6249
6250         local_name = btf__name_by_offset(local_btf, local_type->name_off);
6251         if (!local_name)
6252                 return -EINVAL;
6253
6254         spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
6255         if (str_is_empty(spec_str))
6256                 return -EINVAL;
6257
6258         if (prog->obj->gen_loader) {
6259                 pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
6260                         prog - prog->obj->programs, relo->insn_off / 8,
6261                         local_name, spec_str, relo->kind);
6262                 return -ENOTSUP;
6263         }
6264         err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
6265         if (err) {
6266                 pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
6267                         prog->name, relo_idx, local_id, btf_kind_str(local_type),
6268                         str_is_empty(local_name) ? "<anon>" : local_name,
6269                         spec_str, err);
6270                 return -EINVAL;
6271         }
6272
6273         pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
6274                  relo_idx, core_relo_kind_str(relo->kind), relo->kind);
6275         bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
6276         libbpf_print(LIBBPF_DEBUG, "\n");
6277
6278         /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
6279         if (relo->kind == BPF_TYPE_ID_LOCAL) {
6280                 targ_res.validate = true;
6281                 targ_res.poison = false;
6282                 targ_res.orig_val = local_spec.root_type_id;
6283                 targ_res.new_val = local_spec.root_type_id;
6284                 goto patch_insn;
6285         }
6286
6287         /* libbpf doesn't support candidate search for anonymous types */
6288         if (str_is_empty(spec_str)) {
6289                 pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
6290                         prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
6291                 return -EOPNOTSUPP;
6292         }
6293
6294         if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
6295                 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
6296                 if (IS_ERR(cands)) {
6297                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
6298                                 prog->name, relo_idx, local_id, btf_kind_str(local_type),
6299                                 local_name, PTR_ERR(cands));
6300                         return PTR_ERR(cands);
6301                 }
6302                 err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
6303                 if (err) {
6304                         bpf_core_free_cands(cands);
6305                         return err;
6306                 }
6307         }
6308
6309         for (i = 0, j = 0; i < cands->len; i++) {
6310                 err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
6311                                           cands->cands[i].id, &cand_spec);
6312                 if (err < 0) {
6313                         pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
6314                                 prog->name, relo_idx, i);
6315                         bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
6316                         libbpf_print(LIBBPF_WARN, ": %d\n", err);
6317                         return err;
6318                 }
6319
6320                 pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
6321                          relo_idx, err == 0 ? "non-matching" : "matching", i);
6322                 bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
6323                 libbpf_print(LIBBPF_DEBUG, "\n");
6324
6325                 if (err == 0)
6326                         continue;
6327
6328                 err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
6329                 if (err)
6330                         return err;
6331
6332                 if (j == 0) {
6333                         targ_res = cand_res;
6334                         targ_spec = cand_spec;
6335                 } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
6336                         /* if there are many field relo candidates, they
6337                          * should all resolve to the same bit offset
6338                          */
6339                         pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
6340                                 prog->name, relo_idx, cand_spec.bit_offset,
6341                                 targ_spec.bit_offset);
6342                         return -EINVAL;
6343                 } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
6344                         /* all candidates should result in the same relocation
6345                          * decision and value, otherwise it's dangerous to
6346                          * proceed due to ambiguity
6347                          */
6348                         pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
6349                                 prog->name, relo_idx,
6350                                 cand_res.poison ? "failure" : "success", cand_res.new_val,
6351                                 targ_res.poison ? "failure" : "success", targ_res.new_val);
6352                         return -EINVAL;
6353                 }
6354
6355                 cands->cands[j++] = cands->cands[i];
6356         }
6357
6358         /*
6359          * For BPF_FIELD_EXISTS relo or when used BPF program has field
6360          * existence checks or kernel version/config checks, it's expected
6361          * that we might not find any candidates. In this case, if field
6362          * wasn't found in any candidate, the list of candidates shouldn't
6363          * change at all, we'll just handle relocating appropriately,
6364          * depending on relo's kind.
6365          */
6366         if (j > 0)
6367                 cands->len = j;
6368
6369         /*
6370          * If no candidates were found, it might be both a programmer error,
6371          * as well as expected case, depending whether instruction w/
6372          * relocation is guarded in some way that makes it unreachable (dead
6373          * code) if relocation can't be resolved. This is handled in
6374          * bpf_core_patch_insn() uniformly by replacing that instruction with
6375          * BPF helper call insn (using invalid helper ID). If that instruction
6376          * is indeed unreachable, then it will be ignored and eliminated by
6377          * verifier. If it was an error, then verifier will complain and point
6378          * to a specific instruction number in its log.
6379          */
6380         if (j == 0) {
6381                 pr_debug("prog '%s': relo #%d: no matching targets found\n",
6382                          prog->name, relo_idx);
6383
6384                 /* calculate single target relo result explicitly */
6385                 err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
6386                 if (err)
6387                         return err;
6388         }
6389
6390 patch_insn:
6391         /* bpf_core_patch_insn() should know how to handle missing targ_spec */
6392         err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
6393         if (err) {
6394                 pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n",
6395                         prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err);
6396                 return -EINVAL;
6397         }
6398
6399         return 0;
6400 }
6401
6402 static int
6403 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
6404 {
6405         const struct btf_ext_info_sec *sec;
6406         const struct bpf_core_relo *rec;
6407         const struct btf_ext_info *seg;
6408         struct hashmap_entry *entry;
6409         struct hashmap *cand_cache = NULL;
6410         struct bpf_program *prog;
6411         const char *sec_name;
6412         int i, err = 0, insn_idx, sec_idx;
6413
6414         if (obj->btf_ext->core_relo_info.len == 0)
6415                 return 0;
6416
6417         if (targ_btf_path) {
6418                 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
6419                 err = libbpf_get_error(obj->btf_vmlinux_override);
6420                 if (err) {
6421                         pr_warn("failed to parse target BTF: %d\n", err);
6422                         return err;
6423                 }
6424         }
6425
6426         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
6427         if (IS_ERR(cand_cache)) {
6428                 err = PTR_ERR(cand_cache);
6429                 goto out;
6430         }
6431
6432         seg = &obj->btf_ext->core_relo_info;
6433         for_each_btf_ext_sec(seg, sec) {
6434                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
6435                 if (str_is_empty(sec_name)) {
6436                         err = -EINVAL;
6437                         goto out;
6438                 }
6439                 /* bpf_object's ELF is gone by now so it's not easy to find
6440                  * section index by section name, but we can find *any*
6441                  * bpf_program within desired section name and use it's
6442                  * prog->sec_idx to do a proper search by section index and
6443                  * instruction offset
6444                  */
6445                 prog = NULL;
6446                 for (i = 0; i < obj->nr_programs; i++) {
6447                         prog = &obj->programs[i];
6448                         if (strcmp(prog->sec_name, sec_name) == 0)
6449                                 break;
6450                 }
6451                 if (!prog) {
6452                         pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
6453                         return -ENOENT;
6454                 }
6455                 sec_idx = prog->sec_idx;
6456
6457                 pr_debug("sec '%s': found %d CO-RE relocations\n",
6458                          sec_name, sec->num_info);
6459
6460                 for_each_btf_ext_rec(seg, sec, i, rec) {
6461                         insn_idx = rec->insn_off / BPF_INSN_SZ;
6462                         prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
6463                         if (!prog) {
6464                                 pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n",
6465                                         sec_name, insn_idx, i);
6466                                 err = -EINVAL;
6467                                 goto out;
6468                         }
6469                         /* no need to apply CO-RE relocation if the program is
6470                          * not going to be loaded
6471                          */
6472                         if (!prog->load)
6473                                 continue;
6474
6475                         err = bpf_core_apply_relo(prog, rec, i, obj->btf, cand_cache);
6476                         if (err) {
6477                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
6478                                         prog->name, i, err);
6479                                 goto out;
6480                         }
6481                 }
6482         }
6483
6484 out:
6485         /* obj->btf_vmlinux and module BTFs are freed after object load */
6486         btf__free(obj->btf_vmlinux_override);
6487         obj->btf_vmlinux_override = NULL;
6488
6489         if (!IS_ERR_OR_NULL(cand_cache)) {
6490                 hashmap__for_each_entry(cand_cache, entry, i) {
6491                         bpf_core_free_cands(entry->value);
6492                 }
6493                 hashmap__free(cand_cache);
6494         }
6495         return err;
6496 }
6497
6498 /* Relocate data references within program code:
6499  *  - map references;
6500  *  - global variable references;
6501  *  - extern references.
6502  */
6503 static int
6504 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
6505 {
6506         int i;
6507
6508         for (i = 0; i < prog->nr_reloc; i++) {
6509                 struct reloc_desc *relo = &prog->reloc_desc[i];
6510                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6511                 struct extern_desc *ext;
6512
6513                 switch (relo->type) {
6514                 case RELO_LD64:
6515                         if (obj->gen_loader) {
6516                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
6517                                 insn[0].imm = relo->map_idx;
6518                         } else {
6519                                 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
6520                                 insn[0].imm = obj->maps[relo->map_idx].fd;
6521                         }
6522                         break;
6523                 case RELO_DATA:
6524                         insn[1].imm = insn[0].imm + relo->sym_off;
6525                         if (obj->gen_loader) {
6526                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6527                                 insn[0].imm = relo->map_idx;
6528                         } else {
6529                                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6530                                 insn[0].imm = obj->maps[relo->map_idx].fd;
6531                         }
6532                         break;
6533                 case RELO_EXTERN_VAR:
6534                         ext = &obj->externs[relo->sym_off];
6535                         if (ext->type == EXT_KCFG) {
6536                                 if (obj->gen_loader) {
6537                                         insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6538                                         insn[0].imm = obj->kconfig_map_idx;
6539                                 } else {
6540                                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6541                                         insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
6542                                 }
6543                                 insn[1].imm = ext->kcfg.data_off;
6544                         } else /* EXT_KSYM */ {
6545                                 if (ext->ksym.type_id) { /* typed ksyms */
6546                                         insn[0].src_reg = BPF_PSEUDO_BTF_ID;
6547                                         insn[0].imm = ext->ksym.kernel_btf_id;
6548                                         insn[1].imm = ext->ksym.kernel_btf_obj_fd;
6549                                 } else { /* typeless ksyms */
6550                                         insn[0].imm = (__u32)ext->ksym.addr;
6551                                         insn[1].imm = ext->ksym.addr >> 32;
6552                                 }
6553                         }
6554                         break;
6555                 case RELO_EXTERN_FUNC:
6556                         ext = &obj->externs[relo->sym_off];
6557                         insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
6558                         insn[0].imm = ext->ksym.kernel_btf_id;
6559                         break;
6560                 case RELO_SUBPROG_ADDR:
6561                         if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
6562                                 pr_warn("prog '%s': relo #%d: bad insn\n",
6563                                         prog->name, i);
6564                                 return -EINVAL;
6565                         }
6566                         /* handled already */
6567                         break;
6568                 case RELO_CALL:
6569                         /* handled already */
6570                         break;
6571                 default:
6572                         pr_warn("prog '%s': relo #%d: bad relo type %d\n",
6573                                 prog->name, i, relo->type);
6574                         return -EINVAL;
6575                 }
6576         }
6577
6578         return 0;
6579 }
6580
6581 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
6582                                     const struct bpf_program *prog,
6583                                     const struct btf_ext_info *ext_info,
6584                                     void **prog_info, __u32 *prog_rec_cnt,
6585                                     __u32 *prog_rec_sz)
6586 {
6587         void *copy_start = NULL, *copy_end = NULL;
6588         void *rec, *rec_end, *new_prog_info;
6589         const struct btf_ext_info_sec *sec;
6590         size_t old_sz, new_sz;
6591         const char *sec_name;
6592         int i, off_adj;
6593
6594         for_each_btf_ext_sec(ext_info, sec) {
6595                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
6596                 if (!sec_name)
6597                         return -EINVAL;
6598                 if (strcmp(sec_name, prog->sec_name) != 0)
6599                         continue;
6600
6601                 for_each_btf_ext_rec(ext_info, sec, i, rec) {
6602                         __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
6603
6604                         if (insn_off < prog->sec_insn_off)
6605                                 continue;
6606                         if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
6607                                 break;
6608
6609                         if (!copy_start)
6610                                 copy_start = rec;
6611                         copy_end = rec + ext_info->rec_size;
6612                 }
6613
6614                 if (!copy_start)
6615                         return -ENOENT;
6616
6617                 /* append func/line info of a given (sub-)program to the main
6618                  * program func/line info
6619                  */
6620                 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
6621                 new_sz = old_sz + (copy_end - copy_start);
6622                 new_prog_info = realloc(*prog_info, new_sz);
6623                 if (!new_prog_info)
6624                         return -ENOMEM;
6625                 *prog_info = new_prog_info;
6626                 *prog_rec_cnt = new_sz / ext_info->rec_size;
6627                 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
6628
6629                 /* Kernel instruction offsets are in units of 8-byte
6630                  * instructions, while .BTF.ext instruction offsets generated
6631                  * by Clang are in units of bytes. So convert Clang offsets
6632                  * into kernel offsets and adjust offset according to program
6633                  * relocated position.
6634                  */
6635                 off_adj = prog->sub_insn_off - prog->sec_insn_off;
6636                 rec = new_prog_info + old_sz;
6637                 rec_end = new_prog_info + new_sz;
6638                 for (; rec < rec_end; rec += ext_info->rec_size) {
6639                         __u32 *insn_off = rec;
6640
6641                         *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
6642                 }
6643                 *prog_rec_sz = ext_info->rec_size;
6644                 return 0;
6645         }
6646
6647         return -ENOENT;
6648 }
6649
6650 static int
6651 reloc_prog_func_and_line_info(const struct bpf_object *obj,
6652                               struct bpf_program *main_prog,
6653                               const struct bpf_program *prog)
6654 {
6655         int err;
6656
6657         /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
6658          * supprot func/line info
6659          */
6660         if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
6661                 return 0;
6662
6663         /* only attempt func info relocation if main program's func_info
6664          * relocation was successful
6665          */
6666         if (main_prog != prog && !main_prog->func_info)
6667                 goto line_info;
6668
6669         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
6670                                        &main_prog->func_info,
6671                                        &main_prog->func_info_cnt,
6672                                        &main_prog->func_info_rec_size);
6673         if (err) {
6674                 if (err != -ENOENT) {
6675                         pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
6676                                 prog->name, err);
6677                         return err;
6678                 }
6679                 if (main_prog->func_info) {
6680                         /*
6681                          * Some info has already been found but has problem
6682                          * in the last btf_ext reloc. Must have to error out.
6683                          */
6684                         pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
6685                         return err;
6686                 }
6687                 /* Have problem loading the very first info. Ignore the rest. */
6688                 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6689                         prog->name);
6690         }
6691
6692 line_info:
6693         /* don't relocate line info if main program's relocation failed */
6694         if (main_prog != prog && !main_prog->line_info)
6695                 return 0;
6696
6697         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6698                                        &main_prog->line_info,
6699                                        &main_prog->line_info_cnt,
6700                                        &main_prog->line_info_rec_size);
6701         if (err) {
6702                 if (err != -ENOENT) {
6703                         pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6704                                 prog->name, err);
6705                         return err;
6706                 }
6707                 if (main_prog->line_info) {
6708                         /*
6709                          * Some info has already been found but has problem
6710                          * in the last btf_ext reloc. Must have to error out.
6711                          */
6712                         pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6713                         return err;
6714                 }
6715                 /* Have problem loading the very first info. Ignore the rest. */
6716                 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6717                         prog->name);
6718         }
6719         return 0;
6720 }
6721
6722 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6723 {
6724         size_t insn_idx = *(const size_t *)key;
6725         const struct reloc_desc *relo = elem;
6726
6727         if (insn_idx == relo->insn_idx)
6728                 return 0;
6729         return insn_idx < relo->insn_idx ? -1 : 1;
6730 }
6731
6732 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6733 {
6734         return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6735                        sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6736 }
6737
6738 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6739 {
6740         int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6741         struct reloc_desc *relos;
6742         int i;
6743
6744         if (main_prog == subprog)
6745                 return 0;
6746         relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6747         if (!relos)
6748                 return -ENOMEM;
6749         memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6750                sizeof(*relos) * subprog->nr_reloc);
6751
6752         for (i = main_prog->nr_reloc; i < new_cnt; i++)
6753                 relos[i].insn_idx += subprog->sub_insn_off;
6754         /* After insn_idx adjustment the 'relos' array is still sorted
6755          * by insn_idx and doesn't break bsearch.
6756          */
6757         main_prog->reloc_desc = relos;
6758         main_prog->nr_reloc = new_cnt;
6759         return 0;
6760 }
6761
6762 static int
6763 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6764                        struct bpf_program *prog)
6765 {
6766         size_t sub_insn_idx, insn_idx, new_cnt;
6767         struct bpf_program *subprog;
6768         struct bpf_insn *insns, *insn;
6769         struct reloc_desc *relo;
6770         int err;
6771
6772         err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6773         if (err)
6774                 return err;
6775
6776         for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6777                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6778                 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6779                         continue;
6780
6781                 relo = find_prog_insn_relo(prog, insn_idx);
6782                 if (relo && relo->type == RELO_EXTERN_FUNC)
6783                         /* kfunc relocations will be handled later
6784                          * in bpf_object__relocate_data()
6785                          */
6786                         continue;
6787                 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6788                         pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6789                                 prog->name, insn_idx, relo->type);
6790                         return -LIBBPF_ERRNO__RELOC;
6791                 }
6792                 if (relo) {
6793                         /* sub-program instruction index is a combination of
6794                          * an offset of a symbol pointed to by relocation and
6795                          * call instruction's imm field; for global functions,
6796                          * call always has imm = -1, but for static functions
6797                          * relocation is against STT_SECTION and insn->imm
6798                          * points to a start of a static function
6799                          *
6800                          * for subprog addr relocation, the relo->sym_off + insn->imm is
6801                          * the byte offset in the corresponding section.
6802                          */
6803                         if (relo->type == RELO_CALL)
6804                                 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6805                         else
6806                                 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6807                 } else if (insn_is_pseudo_func(insn)) {
6808                         /*
6809                          * RELO_SUBPROG_ADDR relo is always emitted even if both
6810                          * functions are in the same section, so it shouldn't reach here.
6811                          */
6812                         pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6813                                 prog->name, insn_idx);
6814                         return -LIBBPF_ERRNO__RELOC;
6815                 } else {
6816                         /* if subprogram call is to a static function within
6817                          * the same ELF section, there won't be any relocation
6818                          * emitted, but it also means there is no additional
6819                          * offset necessary, insns->imm is relative to
6820                          * instruction's original position within the section
6821                          */
6822                         sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6823                 }
6824
6825                 /* we enforce that sub-programs should be in .text section */
6826                 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6827                 if (!subprog) {
6828                         pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6829                                 prog->name);
6830                         return -LIBBPF_ERRNO__RELOC;
6831                 }
6832
6833                 /* if it's the first call instruction calling into this
6834                  * subprogram (meaning this subprog hasn't been processed
6835                  * yet) within the context of current main program:
6836                  *   - append it at the end of main program's instructions blog;
6837                  *   - process is recursively, while current program is put on hold;
6838                  *   - if that subprogram calls some other not yet processes
6839                  *   subprogram, same thing will happen recursively until
6840                  *   there are no more unprocesses subprograms left to append
6841                  *   and relocate.
6842                  */
6843                 if (subprog->sub_insn_off == 0) {
6844                         subprog->sub_insn_off = main_prog->insns_cnt;
6845
6846                         new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6847                         insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6848                         if (!insns) {
6849                                 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6850                                 return -ENOMEM;
6851                         }
6852                         main_prog->insns = insns;
6853                         main_prog->insns_cnt = new_cnt;
6854
6855                         memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6856                                subprog->insns_cnt * sizeof(*insns));
6857
6858                         pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6859                                  main_prog->name, subprog->insns_cnt, subprog->name);
6860
6861                         /* The subprog insns are now appended. Append its relos too. */
6862                         err = append_subprog_relos(main_prog, subprog);
6863                         if (err)
6864                                 return err;
6865                         err = bpf_object__reloc_code(obj, main_prog, subprog);
6866                         if (err)
6867                                 return err;
6868                 }
6869
6870                 /* main_prog->insns memory could have been re-allocated, so
6871                  * calculate pointer again
6872                  */
6873                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6874                 /* calculate correct instruction position within current main
6875                  * prog; each main prog can have a different set of
6876                  * subprograms appended (potentially in different order as
6877                  * well), so position of any subprog can be different for
6878                  * different main programs */
6879                 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6880
6881                 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6882                          prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6883         }
6884
6885         return 0;
6886 }
6887
6888 /*
6889  * Relocate sub-program calls.
6890  *
6891  * Algorithm operates as follows. Each entry-point BPF program (referred to as
6892  * main prog) is processed separately. For each subprog (non-entry functions,
6893  * that can be called from either entry progs or other subprogs) gets their
6894  * sub_insn_off reset to zero. This serves as indicator that this subprogram
6895  * hasn't been yet appended and relocated within current main prog. Once its
6896  * relocated, sub_insn_off will point at the position within current main prog
6897  * where given subprog was appended. This will further be used to relocate all
6898  * the call instructions jumping into this subprog.
6899  *
6900  * We start with main program and process all call instructions. If the call
6901  * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6902  * is zero), subprog instructions are appended at the end of main program's
6903  * instruction array. Then main program is "put on hold" while we recursively
6904  * process newly appended subprogram. If that subprogram calls into another
6905  * subprogram that hasn't been appended, new subprogram is appended again to
6906  * the *main* prog's instructions (subprog's instructions are always left
6907  * untouched, as they need to be in unmodified state for subsequent main progs
6908  * and subprog instructions are always sent only as part of a main prog) and
6909  * the process continues recursively. Once all the subprogs called from a main
6910  * prog or any of its subprogs are appended (and relocated), all their
6911  * positions within finalized instructions array are known, so it's easy to
6912  * rewrite call instructions with correct relative offsets, corresponding to
6913  * desired target subprog.
6914  *
6915  * Its important to realize that some subprogs might not be called from some
6916  * main prog and any of its called/used subprogs. Those will keep their
6917  * subprog->sub_insn_off as zero at all times and won't be appended to current
6918  * main prog and won't be relocated within the context of current main prog.
6919  * They might still be used from other main progs later.
6920  *
6921  * Visually this process can be shown as below. Suppose we have two main
6922  * programs mainA and mainB and BPF object contains three subprogs: subA,
6923  * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6924  * subC both call subB:
6925  *
6926  *        +--------+ +-------+
6927  *        |        v v       |
6928  *     +--+---+ +--+-+-+ +---+--+
6929  *     | subA | | subB | | subC |
6930  *     +--+---+ +------+ +---+--+
6931  *        ^                  ^
6932  *        |                  |
6933  *    +---+-------+   +------+----+
6934  *    |   mainA   |   |   mainB   |
6935  *    +-----------+   +-----------+
6936  *
6937  * We'll start relocating mainA, will find subA, append it and start
6938  * processing sub A recursively:
6939  *
6940  *    +-----------+------+
6941  *    |   mainA   | subA |
6942  *    +-----------+------+
6943  *
6944  * At this point we notice that subB is used from subA, so we append it and
6945  * relocate (there are no further subcalls from subB):
6946  *
6947  *    +-----------+------+------+
6948  *    |   mainA   | subA | subB |
6949  *    +-----------+------+------+
6950  *
6951  * At this point, we relocate subA calls, then go one level up and finish with
6952  * relocatin mainA calls. mainA is done.
6953  *
6954  * For mainB process is similar but results in different order. We start with
6955  * mainB and skip subA and subB, as mainB never calls them (at least
6956  * directly), but we see subC is needed, so we append and start processing it:
6957  *
6958  *    +-----------+------+
6959  *    |   mainB   | subC |
6960  *    +-----------+------+
6961  * Now we see subC needs subB, so we go back to it, append and relocate it:
6962  *
6963  *    +-----------+------+------+
6964  *    |   mainB   | subC | subB |
6965  *    +-----------+------+------+
6966  *
6967  * At this point we unwind recursion, relocate calls in subC, then in mainB.
6968  */
6969 static int
6970 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6971 {
6972         struct bpf_program *subprog;
6973         int i, err;
6974
6975         /* mark all subprogs as not relocated (yet) within the context of
6976          * current main program
6977          */
6978         for (i = 0; i < obj->nr_programs; i++) {
6979                 subprog = &obj->programs[i];
6980                 if (!prog_is_subprog(obj, subprog))
6981                         continue;
6982
6983                 subprog->sub_insn_off = 0;
6984         }
6985
6986         err = bpf_object__reloc_code(obj, prog, prog);
6987         if (err)
6988                 return err;
6989
6990
6991         return 0;
6992 }
6993
6994 static void
6995 bpf_object__free_relocs(struct bpf_object *obj)
6996 {
6997         struct bpf_program *prog;
6998         int i;
6999
7000         /* free up relocation descriptors */
7001         for (i = 0; i < obj->nr_programs; i++) {
7002                 prog = &obj->programs[i];
7003                 zfree(&prog->reloc_desc);
7004                 prog->nr_reloc = 0;
7005         }
7006 }
7007
7008 static int
7009 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
7010 {
7011         struct bpf_program *prog;
7012         size_t i, j;
7013         int err;
7014
7015         if (obj->btf_ext) {
7016                 err = bpf_object__relocate_core(obj, targ_btf_path);
7017                 if (err) {
7018                         pr_warn("failed to perform CO-RE relocations: %d\n",
7019                                 err);
7020                         return err;
7021                 }
7022         }
7023
7024         /* Before relocating calls pre-process relocations and mark
7025          * few ld_imm64 instructions that points to subprogs.
7026          * Otherwise bpf_object__reloc_code() later would have to consider
7027          * all ld_imm64 insns as relocation candidates. That would
7028          * reduce relocation speed, since amount of find_prog_insn_relo()
7029          * would increase and most of them will fail to find a relo.
7030          */
7031         for (i = 0; i < obj->nr_programs; i++) {
7032                 prog = &obj->programs[i];
7033                 for (j = 0; j < prog->nr_reloc; j++) {
7034                         struct reloc_desc *relo = &prog->reloc_desc[j];
7035                         struct bpf_insn *insn = &prog->insns[relo->insn_idx];
7036
7037                         /* mark the insn, so it's recognized by insn_is_pseudo_func() */
7038                         if (relo->type == RELO_SUBPROG_ADDR)
7039                                 insn[0].src_reg = BPF_PSEUDO_FUNC;
7040                 }
7041         }
7042
7043         /* relocate subprogram calls and append used subprograms to main
7044          * programs; each copy of subprogram code needs to be relocated
7045          * differently for each main program, because its code location might
7046          * have changed.
7047          * Append subprog relos to main programs to allow data relos to be
7048          * processed after text is completely relocated.
7049          */
7050         for (i = 0; i < obj->nr_programs; i++) {
7051                 prog = &obj->programs[i];
7052                 /* sub-program's sub-calls are relocated within the context of
7053                  * its main program only
7054                  */
7055                 if (prog_is_subprog(obj, prog))
7056                         continue;
7057
7058                 err = bpf_object__relocate_calls(obj, prog);
7059                 if (err) {
7060                         pr_warn("prog '%s': failed to relocate calls: %d\n",
7061                                 prog->name, err);
7062                         return err;
7063                 }
7064         }
7065         /* Process data relos for main programs */
7066         for (i = 0; i < obj->nr_programs; i++) {
7067                 prog = &obj->programs[i];
7068                 if (prog_is_subprog(obj, prog))
7069                         continue;
7070                 err = bpf_object__relocate_data(obj, prog);
7071                 if (err) {
7072                         pr_warn("prog '%s': failed to relocate data references: %d\n",
7073                                 prog->name, err);
7074                         return err;
7075                 }
7076         }
7077         if (!obj->gen_loader)
7078                 bpf_object__free_relocs(obj);
7079         return 0;
7080 }
7081
7082 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
7083                                             GElf_Shdr *shdr, Elf_Data *data);
7084
7085 static int bpf_object__collect_map_relos(struct bpf_object *obj,
7086                                          GElf_Shdr *shdr, Elf_Data *data)
7087 {
7088         const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
7089         int i, j, nrels, new_sz;
7090         const struct btf_var_secinfo *vi = NULL;
7091         const struct btf_type *sec, *var, *def;
7092         struct bpf_map *map = NULL, *targ_map;
7093         const struct btf_member *member;
7094         const char *name, *mname;
7095         Elf_Data *symbols;
7096         unsigned int moff;
7097         GElf_Sym sym;
7098         GElf_Rel rel;
7099         void *tmp;
7100
7101         if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
7102                 return -EINVAL;
7103         sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
7104         if (!sec)
7105                 return -EINVAL;
7106
7107         symbols = obj->efile.symbols;
7108         nrels = shdr->sh_size / shdr->sh_entsize;
7109         for (i = 0; i < nrels; i++) {
7110                 if (!gelf_getrel(data, i, &rel)) {
7111                         pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
7112                         return -LIBBPF_ERRNO__FORMAT;
7113                 }
7114                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
7115                         pr_warn(".maps relo #%d: symbol %zx not found\n",
7116                                 i, (size_t)GELF_R_SYM(rel.r_info));
7117                         return -LIBBPF_ERRNO__FORMAT;
7118                 }
7119                 name = elf_sym_str(obj, sym.st_name) ?: "<?>";
7120                 if (sym.st_shndx != obj->efile.btf_maps_shndx) {
7121                         pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
7122                                 i, name);
7123                         return -LIBBPF_ERRNO__RELOC;
7124                 }
7125
7126                 pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
7127                          i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
7128                          (size_t)rel.r_offset, sym.st_name, name);
7129
7130                 for (j = 0; j < obj->nr_maps; j++) {
7131                         map = &obj->maps[j];
7132                         if (map->sec_idx != obj->efile.btf_maps_shndx)
7133                                 continue;
7134
7135                         vi = btf_var_secinfos(sec) + map->btf_var_idx;
7136                         if (vi->offset <= rel.r_offset &&
7137                             rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
7138                                 break;
7139                 }
7140                 if (j == obj->nr_maps) {
7141                         pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
7142                                 i, name, (size_t)rel.r_offset);
7143                         return -EINVAL;
7144                 }
7145
7146                 if (!bpf_map_type__is_map_in_map(map->def.type))
7147                         return -EINVAL;
7148                 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
7149                     map->def.key_size != sizeof(int)) {
7150                         pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
7151                                 i, map->name, sizeof(int));
7152                         return -EINVAL;
7153                 }
7154
7155                 targ_map = bpf_object__find_map_by_name(obj, name);
7156                 if (!targ_map)
7157                         return -ESRCH;
7158
7159                 var = btf__type_by_id(obj->btf, vi->type);
7160                 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
7161                 if (btf_vlen(def) == 0)
7162                         return -EINVAL;
7163                 member = btf_members(def) + btf_vlen(def) - 1;
7164                 mname = btf__name_by_offset(obj->btf, member->name_off);
7165                 if (strcmp(mname, "values"))
7166                         return -EINVAL;
7167
7168                 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
7169                 if (rel.r_offset - vi->offset < moff)
7170                         return -EINVAL;
7171
7172                 moff = rel.r_offset - vi->offset - moff;
7173                 /* here we use BPF pointer size, which is always 64 bit, as we
7174                  * are parsing ELF that was built for BPF target
7175                  */
7176                 if (moff % bpf_ptr_sz)
7177                         return -EINVAL;
7178                 moff /= bpf_ptr_sz;
7179                 if (moff >= map->init_slots_sz) {
7180                         new_sz = moff + 1;
7181                         tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
7182                         if (!tmp)
7183                                 return -ENOMEM;
7184                         map->init_slots = tmp;
7185                         memset(map->init_slots + map->init_slots_sz, 0,
7186                                (new_sz - map->init_slots_sz) * host_ptr_sz);
7187                         map->init_slots_sz = new_sz;
7188                 }
7189                 map->init_slots[moff] = targ_map;
7190
7191                 pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
7192                          i, map->name, moff, name);
7193         }
7194
7195         return 0;
7196 }
7197
7198 static int cmp_relocs(const void *_a, const void *_b)
7199 {
7200         const struct reloc_desc *a = _a;
7201         const struct reloc_desc *b = _b;
7202
7203         if (a->insn_idx != b->insn_idx)
7204                 return a->insn_idx < b->insn_idx ? -1 : 1;
7205
7206         /* no two relocations should have the same insn_idx, but ... */
7207         if (a->type != b->type)
7208                 return a->type < b->type ? -1 : 1;
7209
7210         return 0;
7211 }
7212
7213 static int bpf_object__collect_relos(struct bpf_object *obj)
7214 {
7215         int i, err;
7216
7217         for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
7218                 GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
7219                 Elf_Data *data = obj->efile.reloc_sects[i].data;
7220                 int idx = shdr->sh_info;
7221
7222                 if (shdr->sh_type != SHT_REL) {
7223                         pr_warn("internal error at %d\n", __LINE__);
7224                         return -LIBBPF_ERRNO__INTERNAL;
7225                 }
7226
7227                 if (idx == obj->efile.st_ops_shndx)
7228                         err = bpf_object__collect_st_ops_relos(obj, shdr, data);
7229                 else if (idx == obj->efile.btf_maps_shndx)
7230                         err = bpf_object__collect_map_relos(obj, shdr, data);
7231                 else
7232                         err = bpf_object__collect_prog_relos(obj, shdr, data);
7233                 if (err)
7234                         return err;
7235         }
7236
7237         for (i = 0; i < obj->nr_programs; i++) {
7238                 struct bpf_program *p = &obj->programs[i];
7239                 
7240                 if (!p->nr_reloc)
7241                         continue;
7242
7243                 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
7244         }
7245         return 0;
7246 }
7247
7248 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
7249 {
7250         if (BPF_CLASS(insn->code) == BPF_JMP &&
7251             BPF_OP(insn->code) == BPF_CALL &&
7252             BPF_SRC(insn->code) == BPF_K &&
7253             insn->src_reg == 0 &&
7254             insn->dst_reg == 0) {
7255                     *func_id = insn->imm;
7256                     return true;
7257         }
7258         return false;
7259 }
7260
7261 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
7262 {
7263         struct bpf_insn *insn = prog->insns;
7264         enum bpf_func_id func_id;
7265         int i;
7266
7267         if (obj->gen_loader)
7268                 return 0;
7269
7270         for (i = 0; i < prog->insns_cnt; i++, insn++) {
7271                 if (!insn_is_helper_call(insn, &func_id))
7272                         continue;
7273
7274                 /* on kernels that don't yet support
7275                  * bpf_probe_read_{kernel,user}[_str] helpers, fall back
7276                  * to bpf_probe_read() which works well for old kernels
7277                  */
7278                 switch (func_id) {
7279                 case BPF_FUNC_probe_read_kernel:
7280                 case BPF_FUNC_probe_read_user:
7281                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7282                                 insn->imm = BPF_FUNC_probe_read;
7283                         break;
7284                 case BPF_FUNC_probe_read_kernel_str:
7285                 case BPF_FUNC_probe_read_user_str:
7286                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7287                                 insn->imm = BPF_FUNC_probe_read_str;
7288                         break;
7289                 default:
7290                         break;
7291                 }
7292         }
7293         return 0;
7294 }
7295
7296 static int
7297 load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
7298              char *license, __u32 kern_version, int *pfd)
7299 {
7300         struct bpf_prog_load_params load_attr = {};
7301         char *cp, errmsg[STRERR_BUFSIZE];
7302         size_t log_buf_size = 0;
7303         char *log_buf = NULL;
7304         int btf_fd, ret;
7305
7306         if (prog->type == BPF_PROG_TYPE_UNSPEC) {
7307                 /*
7308                  * The program type must be set.  Most likely we couldn't find a proper
7309                  * section definition at load time, and thus we didn't infer the type.
7310                  */
7311                 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
7312                         prog->name, prog->sec_name);
7313                 return -EINVAL;
7314         }
7315
7316         if (!insns || !insns_cnt)
7317                 return -EINVAL;
7318
7319         load_attr.prog_type = prog->type;
7320         /* old kernels might not support specifying expected_attach_type */
7321         if (!kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
7322             prog->sec_def->is_exp_attach_type_optional)
7323                 load_attr.expected_attach_type = 0;
7324         else
7325                 load_attr.expected_attach_type = prog->expected_attach_type;
7326         if (kernel_supports(prog->obj, FEAT_PROG_NAME))
7327                 load_attr.name = prog->name;
7328         load_attr.insns = insns;
7329         load_attr.insn_cnt = insns_cnt;
7330         load_attr.license = license;
7331         load_attr.attach_btf_id = prog->attach_btf_id;
7332         if (prog->attach_prog_fd)
7333                 load_attr.attach_prog_fd = prog->attach_prog_fd;
7334         else
7335                 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
7336         load_attr.attach_btf_id = prog->attach_btf_id;
7337         load_attr.kern_version = kern_version;
7338         load_attr.prog_ifindex = prog->prog_ifindex;
7339
7340         /* specify func_info/line_info only if kernel supports them */
7341         btf_fd = bpf_object__btf_fd(prog->obj);
7342         if (btf_fd >= 0 && kernel_supports(prog->obj, FEAT_BTF_FUNC)) {
7343                 load_attr.prog_btf_fd = btf_fd;
7344                 load_attr.func_info = prog->func_info;
7345                 load_attr.func_info_rec_size = prog->func_info_rec_size;
7346                 load_attr.func_info_cnt = prog->func_info_cnt;
7347                 load_attr.line_info = prog->line_info;
7348                 load_attr.line_info_rec_size = prog->line_info_rec_size;
7349                 load_attr.line_info_cnt = prog->line_info_cnt;
7350         }
7351         load_attr.log_level = prog->log_level;
7352         load_attr.prog_flags = prog->prog_flags;
7353
7354         if (prog->obj->gen_loader) {
7355                 bpf_gen__prog_load(prog->obj->gen_loader, &load_attr,
7356                                    prog - prog->obj->programs);
7357                 *pfd = -1;
7358                 return 0;
7359         }
7360 retry_load:
7361         if (log_buf_size) {
7362                 log_buf = malloc(log_buf_size);
7363                 if (!log_buf)
7364                         return -ENOMEM;
7365
7366                 *log_buf = 0;
7367         }
7368
7369         load_attr.log_buf = log_buf;
7370         load_attr.log_buf_sz = log_buf_size;
7371         ret = libbpf__bpf_prog_load(&load_attr);
7372
7373         if (ret >= 0) {
7374                 if (log_buf && load_attr.log_level)
7375                         pr_debug("verifier log:\n%s", log_buf);
7376
7377                 if (prog->obj->rodata_map_idx >= 0 &&
7378                     kernel_supports(prog->obj, FEAT_PROG_BIND_MAP)) {
7379                         struct bpf_map *rodata_map =
7380                                 &prog->obj->maps[prog->obj->rodata_map_idx];
7381
7382                         if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) {
7383                                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7384                                 pr_warn("prog '%s': failed to bind .rodata map: %s\n",
7385                                         prog->name, cp);
7386                                 /* Don't fail hard if can't bind rodata. */
7387                         }
7388                 }
7389
7390                 *pfd = ret;
7391                 ret = 0;
7392                 goto out;
7393         }
7394
7395         if (!log_buf || errno == ENOSPC) {
7396                 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
7397                                    log_buf_size << 1);
7398
7399                 free(log_buf);
7400                 goto retry_load;
7401         }
7402         ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
7403         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7404         pr_warn("load bpf program failed: %s\n", cp);
7405         pr_perm_msg(ret);
7406
7407         if (log_buf && log_buf[0] != '\0') {
7408                 ret = -LIBBPF_ERRNO__VERIFY;
7409                 pr_warn("-- BEGIN DUMP LOG ---\n");
7410                 pr_warn("\n%s\n", log_buf);
7411                 pr_warn("-- END LOG --\n");
7412         } else if (load_attr.insn_cnt >= BPF_MAXINSNS) {
7413                 pr_warn("Program too large (%zu insns), at most %d insns\n",
7414                         load_attr.insn_cnt, BPF_MAXINSNS);
7415                 ret = -LIBBPF_ERRNO__PROG2BIG;
7416         } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
7417                 /* Wrong program type? */
7418                 int fd;
7419
7420                 load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
7421                 load_attr.expected_attach_type = 0;
7422                 load_attr.log_buf = NULL;
7423                 load_attr.log_buf_sz = 0;
7424                 fd = libbpf__bpf_prog_load(&load_attr);
7425                 if (fd >= 0) {
7426                         close(fd);
7427                         ret = -LIBBPF_ERRNO__PROGTYPE;
7428                         goto out;
7429                 }
7430         }
7431
7432 out:
7433         free(log_buf);
7434         return ret;
7435 }
7436
7437 static int bpf_program__record_externs(struct bpf_program *prog)
7438 {
7439         struct bpf_object *obj = prog->obj;
7440         int i;
7441
7442         for (i = 0; i < prog->nr_reloc; i++) {
7443                 struct reloc_desc *relo = &prog->reloc_desc[i];
7444                 struct extern_desc *ext = &obj->externs[relo->sym_off];
7445
7446                 switch (relo->type) {
7447                 case RELO_EXTERN_VAR:
7448                         if (ext->type != EXT_KSYM)
7449                                 continue;
7450                         if (!ext->ksym.type_id) {
7451                                 pr_warn("typeless ksym %s is not supported yet\n",
7452                                         ext->name);
7453                                 return -ENOTSUP;
7454                         }
7455                         bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_VAR,
7456                                                relo->insn_idx);
7457                         break;
7458                 case RELO_EXTERN_FUNC:
7459                         bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_FUNC,
7460                                                relo->insn_idx);
7461                         break;
7462                 default:
7463                         continue;
7464                 }
7465         }
7466         return 0;
7467 }
7468
7469 static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id);
7470
7471 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
7472 {
7473         int err = 0, fd, i;
7474
7475         if (prog->obj->loaded) {
7476                 pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
7477                 return libbpf_err(-EINVAL);
7478         }
7479
7480         if ((prog->type == BPF_PROG_TYPE_TRACING ||
7481              prog->type == BPF_PROG_TYPE_LSM ||
7482              prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
7483                 int btf_obj_fd = 0, btf_type_id = 0;
7484
7485                 err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id);
7486                 if (err)
7487                         return libbpf_err(err);
7488
7489                 prog->attach_btf_obj_fd = btf_obj_fd;
7490                 prog->attach_btf_id = btf_type_id;
7491         }
7492
7493         if (prog->instances.nr < 0 || !prog->instances.fds) {
7494                 if (prog->preprocessor) {
7495                         pr_warn("Internal error: can't load program '%s'\n",
7496                                 prog->name);
7497                         return libbpf_err(-LIBBPF_ERRNO__INTERNAL);
7498                 }
7499
7500                 prog->instances.fds = malloc(sizeof(int));
7501                 if (!prog->instances.fds) {
7502                         pr_warn("Not enough memory for BPF fds\n");
7503                         return libbpf_err(-ENOMEM);
7504                 }
7505                 prog->instances.nr = 1;
7506                 prog->instances.fds[0] = -1;
7507         }
7508
7509         if (!prog->preprocessor) {
7510                 if (prog->instances.nr != 1) {
7511                         pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
7512                                 prog->name, prog->instances.nr);
7513                 }
7514                 if (prog->obj->gen_loader)
7515                         bpf_program__record_externs(prog);
7516                 err = load_program(prog, prog->insns, prog->insns_cnt,
7517                                    license, kern_ver, &fd);
7518                 if (!err)
7519                         prog->instances.fds[0] = fd;
7520                 goto out;
7521         }
7522
7523         for (i = 0; i < prog->instances.nr; i++) {
7524                 struct bpf_prog_prep_result result;
7525                 bpf_program_prep_t preprocessor = prog->preprocessor;
7526
7527                 memset(&result, 0, sizeof(result));
7528                 err = preprocessor(prog, i, prog->insns,
7529                                    prog->insns_cnt, &result);
7530                 if (err) {
7531                         pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
7532                                 i, prog->name);
7533                         goto out;
7534                 }
7535
7536                 if (!result.new_insn_ptr || !result.new_insn_cnt) {
7537                         pr_debug("Skip loading the %dth instance of program '%s'\n",
7538                                  i, prog->name);
7539                         prog->instances.fds[i] = -1;
7540                         if (result.pfd)
7541                                 *result.pfd = -1;
7542                         continue;
7543                 }
7544
7545                 err = load_program(prog, result.new_insn_ptr,
7546                                    result.new_insn_cnt, license, kern_ver, &fd);
7547                 if (err) {
7548                         pr_warn("Loading the %dth instance of program '%s' failed\n",
7549                                 i, prog->name);
7550                         goto out;
7551                 }
7552
7553                 if (result.pfd)
7554                         *result.pfd = fd;
7555                 prog->instances.fds[i] = fd;
7556         }
7557 out:
7558         if (err)
7559                 pr_warn("failed to load program '%s'\n", prog->name);
7560         zfree(&prog->insns);
7561         prog->insns_cnt = 0;
7562         return libbpf_err(err);
7563 }
7564
7565 static int
7566 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7567 {
7568         struct bpf_program *prog;
7569         size_t i;
7570         int err;
7571
7572         for (i = 0; i < obj->nr_programs; i++) {
7573                 prog = &obj->programs[i];
7574                 err = bpf_object__sanitize_prog(obj, prog);
7575                 if (err)
7576                         return err;
7577         }
7578
7579         for (i = 0; i < obj->nr_programs; i++) {
7580                 prog = &obj->programs[i];
7581                 if (prog_is_subprog(obj, prog))
7582                         continue;
7583                 if (!prog->load) {
7584                         pr_debug("prog '%s': skipped loading\n", prog->name);
7585                         continue;
7586                 }
7587                 prog->log_level |= log_level;
7588                 err = bpf_program__load(prog, obj->license, obj->kern_version);
7589                 if (err)
7590                         return err;
7591         }
7592         if (obj->gen_loader)
7593                 bpf_object__free_relocs(obj);
7594         return 0;
7595 }
7596
7597 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7598
7599 static struct bpf_object *
7600 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7601                    const struct bpf_object_open_opts *opts)
7602 {
7603         const char *obj_name, *kconfig, *btf_tmp_path;
7604         struct bpf_program *prog;
7605         struct bpf_object *obj;
7606         char tmp_name[64];
7607         int err;
7608
7609         if (elf_version(EV_CURRENT) == EV_NONE) {
7610                 pr_warn("failed to init libelf for %s\n",
7611                         path ? : "(mem buf)");
7612                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7613         }
7614
7615         if (!OPTS_VALID(opts, bpf_object_open_opts))
7616                 return ERR_PTR(-EINVAL);
7617
7618         obj_name = OPTS_GET(opts, object_name, NULL);
7619         if (obj_buf) {
7620                 if (!obj_name) {
7621                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
7622                                  (unsigned long)obj_buf,
7623                                  (unsigned long)obj_buf_sz);
7624                         obj_name = tmp_name;
7625                 }
7626                 path = obj_name;
7627                 pr_debug("loading object '%s' from buffer\n", obj_name);
7628         }
7629
7630         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
7631         if (IS_ERR(obj))
7632                 return obj;
7633
7634         btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
7635         if (btf_tmp_path) {
7636                 if (strlen(btf_tmp_path) >= PATH_MAX) {
7637                         err = -ENAMETOOLONG;
7638                         goto out;
7639                 }
7640                 obj->btf_custom_path = strdup(btf_tmp_path);
7641                 if (!obj->btf_custom_path) {
7642                         err = -ENOMEM;
7643                         goto out;
7644                 }
7645         }
7646
7647         kconfig = OPTS_GET(opts, kconfig, NULL);
7648         if (kconfig) {
7649                 obj->kconfig = strdup(kconfig);
7650                 if (!obj->kconfig) {
7651                         err = -ENOMEM;
7652                         goto out;
7653                 }
7654         }
7655
7656         err = bpf_object__elf_init(obj);
7657         err = err ? : bpf_object__check_endianness(obj);
7658         err = err ? : bpf_object__elf_collect(obj);
7659         err = err ? : bpf_object__collect_externs(obj);
7660         err = err ? : bpf_object__finalize_btf(obj);
7661         err = err ? : bpf_object__init_maps(obj, opts);
7662         err = err ? : bpf_object__collect_relos(obj);
7663         if (err)
7664                 goto out;
7665         bpf_object__elf_finish(obj);
7666
7667         bpf_object__for_each_program(prog, obj) {
7668                 prog->sec_def = find_sec_def(prog->sec_name);
7669                 if (!prog->sec_def) {
7670                         /* couldn't guess, but user might manually specify */
7671                         pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7672                                 prog->name, prog->sec_name);
7673                         continue;
7674                 }
7675
7676                 if (prog->sec_def->is_sleepable)
7677                         prog->prog_flags |= BPF_F_SLEEPABLE;
7678                 bpf_program__set_type(prog, prog->sec_def->prog_type);
7679                 bpf_program__set_expected_attach_type(prog,
7680                                 prog->sec_def->expected_attach_type);
7681
7682                 if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
7683                     prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
7684                         prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
7685         }
7686
7687         return obj;
7688 out:
7689         bpf_object__close(obj);
7690         return ERR_PTR(err);
7691 }
7692
7693 static struct bpf_object *
7694 __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
7695 {
7696         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
7697                 .relaxed_maps = flags & MAPS_RELAX_COMPAT,
7698         );
7699
7700         /* param validation */
7701         if (!attr->file)
7702                 return NULL;
7703
7704         pr_debug("loading %s\n", attr->file);
7705         return __bpf_object__open(attr->file, NULL, 0, &opts);
7706 }
7707
7708 struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
7709 {
7710         return libbpf_ptr(__bpf_object__open_xattr(attr, 0));
7711 }
7712
7713 struct bpf_object *bpf_object__open(const char *path)
7714 {
7715         struct bpf_object_open_attr attr = {
7716                 .file           = path,
7717                 .prog_type      = BPF_PROG_TYPE_UNSPEC,
7718         };
7719
7720         return libbpf_ptr(__bpf_object__open_xattr(&attr, 0));
7721 }
7722
7723 struct bpf_object *
7724 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
7725 {
7726         if (!path)
7727                 return libbpf_err_ptr(-EINVAL);
7728
7729         pr_debug("loading %s\n", path);
7730
7731         return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts));
7732 }
7733
7734 struct bpf_object *
7735 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7736                      const struct bpf_object_open_opts *opts)
7737 {
7738         if (!obj_buf || obj_buf_sz == 0)
7739                 return libbpf_err_ptr(-EINVAL);
7740
7741         return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts));
7742 }
7743
7744 struct bpf_object *
7745 bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
7746                         const char *name)
7747 {
7748         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
7749                 .object_name = name,
7750                 /* wrong default, but backwards-compatible */
7751                 .relaxed_maps = true,
7752         );
7753
7754         /* returning NULL is wrong, but backwards-compatible */
7755         if (!obj_buf || obj_buf_sz == 0)
7756                 return errno = EINVAL, NULL;
7757
7758         return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts));
7759 }
7760
7761 int bpf_object__unload(struct bpf_object *obj)
7762 {
7763         size_t i;
7764
7765         if (!obj)
7766                 return libbpf_err(-EINVAL);
7767
7768         for (i = 0; i < obj->nr_maps; i++) {
7769                 zclose(obj->maps[i].fd);
7770                 if (obj->maps[i].st_ops)
7771                         zfree(&obj->maps[i].st_ops->kern_vdata);
7772         }
7773
7774         for (i = 0; i < obj->nr_programs; i++)
7775                 bpf_program__unload(&obj->programs[i]);
7776
7777         return 0;
7778 }
7779
7780 static int bpf_object__sanitize_maps(struct bpf_object *obj)
7781 {
7782         struct bpf_map *m;
7783
7784         bpf_object__for_each_map(m, obj) {
7785                 if (!bpf_map__is_internal(m))
7786                         continue;
7787                 if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) {
7788                         pr_warn("kernel doesn't support global data\n");
7789                         return -ENOTSUP;
7790                 }
7791                 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
7792                         m->def.map_flags ^= BPF_F_MMAPABLE;
7793         }
7794
7795         return 0;
7796 }
7797
7798 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7799 {
7800         char sym_type, sym_name[500];
7801         unsigned long long sym_addr;
7802         const struct btf_type *t;
7803         struct extern_desc *ext;
7804         int ret, err = 0;
7805         FILE *f;
7806
7807         f = fopen("/proc/kallsyms", "r");
7808         if (!f) {
7809                 err = -errno;
7810                 pr_warn("failed to open /proc/kallsyms: %d\n", err);
7811                 return err;
7812         }
7813
7814         while (true) {
7815                 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7816                              &sym_addr, &sym_type, sym_name);
7817                 if (ret == EOF && feof(f))
7818                         break;
7819                 if (ret != 3) {
7820                         pr_warn("failed to read kallsyms entry: %d\n", ret);
7821                         err = -EINVAL;
7822                         goto out;
7823                 }
7824
7825                 ext = find_extern_by_name(obj, sym_name);
7826                 if (!ext || ext->type != EXT_KSYM)
7827                         continue;
7828
7829                 t = btf__type_by_id(obj->btf, ext->btf_id);
7830                 if (!btf_is_var(t))
7831                         continue;
7832
7833                 if (ext->is_set && ext->ksym.addr != sym_addr) {
7834                         pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
7835                                 sym_name, ext->ksym.addr, sym_addr);
7836                         err = -EINVAL;
7837                         goto out;
7838                 }
7839                 if (!ext->is_set) {
7840                         ext->is_set = true;
7841                         ext->ksym.addr = sym_addr;
7842                         pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
7843                 }
7844         }
7845
7846 out:
7847         fclose(f);
7848         return err;
7849 }
7850
7851 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
7852                             __u16 kind, struct btf **res_btf,
7853                             int *res_btf_fd)
7854 {
7855         int i, id, btf_fd, err;
7856         struct btf *btf;
7857
7858         btf = obj->btf_vmlinux;
7859         btf_fd = 0;
7860         id = btf__find_by_name_kind(btf, ksym_name, kind);
7861
7862         if (id == -ENOENT) {
7863                 err = load_module_btfs(obj);
7864                 if (err)
7865                         return err;
7866
7867                 for (i = 0; i < obj->btf_module_cnt; i++) {
7868                         btf = obj->btf_modules[i].btf;
7869                         /* we assume module BTF FD is always >0 */
7870                         btf_fd = obj->btf_modules[i].fd;
7871                         id = btf__find_by_name_kind(btf, ksym_name, kind);
7872                         if (id != -ENOENT)
7873                                 break;
7874                 }
7875         }
7876         if (id <= 0) {
7877                 pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
7878                         __btf_kind_str(kind), ksym_name);
7879                 return -ESRCH;
7880         }
7881
7882         *res_btf = btf;
7883         *res_btf_fd = btf_fd;
7884         return id;
7885 }
7886
7887 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
7888                                                struct extern_desc *ext)
7889 {
7890         const struct btf_type *targ_var, *targ_type;
7891         __u32 targ_type_id, local_type_id;
7892         const char *targ_var_name;
7893         int id, btf_fd = 0, err;
7894         struct btf *btf = NULL;
7895
7896         id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd);
7897         if (id < 0)
7898                 return id;
7899
7900         /* find local type_id */
7901         local_type_id = ext->ksym.type_id;
7902
7903         /* find target type_id */
7904         targ_var = btf__type_by_id(btf, id);
7905         targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
7906         targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
7907
7908         err = bpf_core_types_are_compat(obj->btf, local_type_id,
7909                                         btf, targ_type_id);
7910         if (err <= 0) {
7911                 const struct btf_type *local_type;
7912                 const char *targ_name, *local_name;
7913
7914                 local_type = btf__type_by_id(obj->btf, local_type_id);
7915                 local_name = btf__name_by_offset(obj->btf, local_type->name_off);
7916                 targ_name = btf__name_by_offset(btf, targ_type->name_off);
7917
7918                 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7919                         ext->name, local_type_id,
7920                         btf_kind_str(local_type), local_name, targ_type_id,
7921                         btf_kind_str(targ_type), targ_name);
7922                 return -EINVAL;
7923         }
7924
7925         ext->is_set = true;
7926         ext->ksym.kernel_btf_obj_fd = btf_fd;
7927         ext->ksym.kernel_btf_id = id;
7928         pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
7929                  ext->name, id, btf_kind_str(targ_var), targ_var_name);
7930
7931         return 0;
7932 }
7933
7934 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
7935                                                 struct extern_desc *ext)
7936 {
7937         int local_func_proto_id, kfunc_proto_id, kfunc_id;
7938         const struct btf_type *kern_func;
7939         struct btf *kern_btf = NULL;
7940         int ret, kern_btf_fd = 0;
7941
7942         local_func_proto_id = ext->ksym.type_id;
7943
7944         kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC,
7945                                     &kern_btf, &kern_btf_fd);
7946         if (kfunc_id < 0) {
7947                 pr_warn("extern (func ksym) '%s': not found in kernel BTF\n",
7948                         ext->name);
7949                 return kfunc_id;
7950         }
7951
7952         if (kern_btf != obj->btf_vmlinux) {
7953                 pr_warn("extern (func ksym) '%s': function in kernel module is not supported\n",
7954                         ext->name);
7955                 return -ENOTSUP;
7956         }
7957
7958         kern_func = btf__type_by_id(kern_btf, kfunc_id);
7959         kfunc_proto_id = kern_func->type;
7960
7961         ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
7962                                         kern_btf, kfunc_proto_id);
7963         if (ret <= 0) {
7964                 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n",
7965                         ext->name, local_func_proto_id, kfunc_proto_id);
7966                 return -EINVAL;
7967         }
7968
7969         ext->is_set = true;
7970         ext->ksym.kernel_btf_obj_fd = kern_btf_fd;
7971         ext->ksym.kernel_btf_id = kfunc_id;
7972         pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n",
7973                  ext->name, kfunc_id);
7974
7975         return 0;
7976 }
7977
7978 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
7979 {
7980         const struct btf_type *t;
7981         struct extern_desc *ext;
7982         int i, err;
7983
7984         for (i = 0; i < obj->nr_extern; i++) {
7985                 ext = &obj->externs[i];
7986                 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
7987                         continue;
7988
7989                 if (obj->gen_loader) {
7990                         ext->is_set = true;
7991                         ext->ksym.kernel_btf_obj_fd = 0;
7992                         ext->ksym.kernel_btf_id = 0;
7993                         continue;
7994                 }
7995                 t = btf__type_by_id(obj->btf, ext->btf_id);
7996                 if (btf_is_var(t))
7997                         err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
7998                 else
7999                         err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
8000                 if (err)
8001                         return err;
8002         }
8003         return 0;
8004 }
8005
8006 static int bpf_object__resolve_externs(struct bpf_object *obj,
8007                                        const char *extra_kconfig)
8008 {
8009         bool need_config = false, need_kallsyms = false;
8010         bool need_vmlinux_btf = false;
8011         struct extern_desc *ext;
8012         void *kcfg_data = NULL;
8013         int err, i;
8014
8015         if (obj->nr_extern == 0)
8016                 return 0;
8017
8018         if (obj->kconfig_map_idx >= 0)
8019                 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
8020
8021         for (i = 0; i < obj->nr_extern; i++) {
8022                 ext = &obj->externs[i];
8023
8024                 if (ext->type == EXT_KCFG &&
8025                     strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
8026                         void *ext_val = kcfg_data + ext->kcfg.data_off;
8027                         __u32 kver = get_kernel_version();
8028
8029                         if (!kver) {
8030                                 pr_warn("failed to get kernel version\n");
8031                                 return -EINVAL;
8032                         }
8033                         err = set_kcfg_value_num(ext, ext_val, kver);
8034                         if (err)
8035                                 return err;
8036                         pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
8037                 } else if (ext->type == EXT_KCFG &&
8038                            strncmp(ext->name, "CONFIG_", 7) == 0) {
8039                         need_config = true;
8040                 } else if (ext->type == EXT_KSYM) {
8041                         if (ext->ksym.type_id)
8042                                 need_vmlinux_btf = true;
8043                         else
8044                                 need_kallsyms = true;
8045                 } else {
8046                         pr_warn("unrecognized extern '%s'\n", ext->name);
8047                         return -EINVAL;
8048                 }
8049         }
8050         if (need_config && extra_kconfig) {
8051                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
8052                 if (err)
8053                         return -EINVAL;
8054                 need_config = false;
8055                 for (i = 0; i < obj->nr_extern; i++) {
8056                         ext = &obj->externs[i];
8057                         if (ext->type == EXT_KCFG && !ext->is_set) {
8058                                 need_config = true;
8059                                 break;
8060                         }
8061                 }
8062         }
8063         if (need_config) {
8064                 err = bpf_object__read_kconfig_file(obj, kcfg_data);
8065                 if (err)
8066                         return -EINVAL;
8067         }
8068         if (need_kallsyms) {
8069                 err = bpf_object__read_kallsyms_file(obj);
8070                 if (err)
8071                         return -EINVAL;
8072         }
8073         if (need_vmlinux_btf) {
8074                 err = bpf_object__resolve_ksyms_btf_id(obj);
8075                 if (err)
8076                         return -EINVAL;
8077         }
8078         for (i = 0; i < obj->nr_extern; i++) {
8079                 ext = &obj->externs[i];
8080
8081                 if (!ext->is_set && !ext->is_weak) {
8082                         pr_warn("extern %s (strong) not resolved\n", ext->name);
8083                         return -ESRCH;
8084                 } else if (!ext->is_set) {
8085                         pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
8086                                  ext->name);
8087                 }
8088         }
8089
8090         return 0;
8091 }
8092
8093 int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
8094 {
8095         struct bpf_object *obj;
8096         int err, i;
8097
8098         if (!attr)
8099                 return libbpf_err(-EINVAL);
8100         obj = attr->obj;
8101         if (!obj)
8102                 return libbpf_err(-EINVAL);
8103
8104         if (obj->loaded) {
8105                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
8106                 return libbpf_err(-EINVAL);
8107         }
8108
8109         if (obj->gen_loader)
8110                 bpf_gen__init(obj->gen_loader, attr->log_level);
8111
8112         err = bpf_object__probe_loading(obj);
8113         err = err ? : bpf_object__load_vmlinux_btf(obj, false);
8114         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
8115         err = err ? : bpf_object__sanitize_and_load_btf(obj);
8116         err = err ? : bpf_object__sanitize_maps(obj);
8117         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
8118         err = err ? : bpf_object__create_maps(obj);
8119         err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path);
8120         err = err ? : bpf_object__load_progs(obj, attr->log_level);
8121
8122         if (obj->gen_loader) {
8123                 /* reset FDs */
8124                 btf__set_fd(obj->btf, -1);
8125                 for (i = 0; i < obj->nr_maps; i++)
8126                         obj->maps[i].fd = -1;
8127                 if (!err)
8128                         err = bpf_gen__finish(obj->gen_loader);
8129         }
8130
8131         /* clean up module BTFs */
8132         for (i = 0; i < obj->btf_module_cnt; i++) {
8133                 close(obj->btf_modules[i].fd);
8134                 btf__free(obj->btf_modules[i].btf);
8135                 free(obj->btf_modules[i].name);
8136         }
8137         free(obj->btf_modules);
8138
8139         /* clean up vmlinux BTF */
8140         btf__free(obj->btf_vmlinux);
8141         obj->btf_vmlinux = NULL;
8142
8143         obj->loaded = true; /* doesn't matter if successfully or not */
8144
8145         if (err)
8146                 goto out;
8147
8148         return 0;
8149 out:
8150         /* unpin any maps that were auto-pinned during load */
8151         for (i = 0; i < obj->nr_maps; i++)
8152                 if (obj->maps[i].pinned && !obj->maps[i].reused)
8153                         bpf_map__unpin(&obj->maps[i], NULL);
8154
8155         bpf_object__unload(obj);
8156         pr_warn("failed to load object '%s'\n", obj->path);
8157         return libbpf_err(err);
8158 }
8159
8160 int bpf_object__load(struct bpf_object *obj)
8161 {
8162         struct bpf_object_load_attr attr = {
8163                 .obj = obj,
8164         };
8165
8166         return bpf_object__load_xattr(&attr);
8167 }
8168
8169 static int make_parent_dir(const char *path)
8170 {
8171         char *cp, errmsg[STRERR_BUFSIZE];
8172         char *dname, *dir;
8173         int err = 0;
8174
8175         dname = strdup(path);
8176         if (dname == NULL)
8177                 return -ENOMEM;
8178
8179         dir = dirname(dname);
8180         if (mkdir(dir, 0700) && errno != EEXIST)
8181                 err = -errno;
8182
8183         free(dname);
8184         if (err) {
8185                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8186                 pr_warn("failed to mkdir %s: %s\n", path, cp);
8187         }
8188         return err;
8189 }
8190
8191 static int check_path(const char *path)
8192 {
8193         char *cp, errmsg[STRERR_BUFSIZE];
8194         struct statfs st_fs;
8195         char *dname, *dir;
8196         int err = 0;
8197
8198         if (path == NULL)
8199                 return -EINVAL;
8200
8201         dname = strdup(path);
8202         if (dname == NULL)
8203                 return -ENOMEM;
8204
8205         dir = dirname(dname);
8206         if (statfs(dir, &st_fs)) {
8207                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
8208                 pr_warn("failed to statfs %s: %s\n", dir, cp);
8209                 err = -errno;
8210         }
8211         free(dname);
8212
8213         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
8214                 pr_warn("specified path %s is not on BPF FS\n", path);
8215                 err = -EINVAL;
8216         }
8217
8218         return err;
8219 }
8220
8221 int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
8222                               int instance)
8223 {
8224         char *cp, errmsg[STRERR_BUFSIZE];
8225         int err;
8226
8227         err = make_parent_dir(path);
8228         if (err)
8229                 return libbpf_err(err);
8230
8231         err = check_path(path);
8232         if (err)
8233                 return libbpf_err(err);
8234
8235         if (prog == NULL) {
8236                 pr_warn("invalid program pointer\n");
8237                 return libbpf_err(-EINVAL);
8238         }
8239
8240         if (instance < 0 || instance >= prog->instances.nr) {
8241                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
8242                         instance, prog->name, prog->instances.nr);
8243                 return libbpf_err(-EINVAL);
8244         }
8245
8246         if (bpf_obj_pin(prog->instances.fds[instance], path)) {
8247                 err = -errno;
8248                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
8249                 pr_warn("failed to pin program: %s\n", cp);
8250                 return libbpf_err(err);
8251         }
8252         pr_debug("pinned program '%s'\n", path);
8253
8254         return 0;
8255 }
8256
8257 int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
8258                                 int instance)
8259 {
8260         int err;
8261
8262         err = check_path(path);
8263         if (err)
8264                 return libbpf_err(err);
8265
8266         if (prog == NULL) {
8267                 pr_warn("invalid program pointer\n");
8268                 return libbpf_err(-EINVAL);
8269         }
8270
8271         if (instance < 0 || instance >= prog->instances.nr) {
8272                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
8273                         instance, prog->name, prog->instances.nr);
8274                 return libbpf_err(-EINVAL);
8275         }
8276
8277         err = unlink(path);
8278         if (err != 0)
8279                 return libbpf_err(-errno);
8280
8281         pr_debug("unpinned program '%s'\n", path);
8282
8283         return 0;
8284 }
8285
8286 int bpf_program__pin(struct bpf_program *prog, const char *path)
8287 {
8288         int i, err;
8289
8290         err = make_parent_dir(path);
8291         if (err)
8292                 return libbpf_err(err);
8293
8294         err = check_path(path);
8295         if (err)
8296                 return libbpf_err(err);
8297
8298         if (prog == NULL) {
8299                 pr_warn("invalid program pointer\n");
8300                 return libbpf_err(-EINVAL);
8301         }
8302
8303         if (prog->instances.nr <= 0) {
8304                 pr_warn("no instances of prog %s to pin\n", prog->name);
8305                 return libbpf_err(-EINVAL);
8306         }
8307
8308         if (prog->instances.nr == 1) {
8309                 /* don't create subdirs when pinning single instance */
8310                 return bpf_program__pin_instance(prog, path, 0);
8311         }
8312
8313         for (i = 0; i < prog->instances.nr; i++) {
8314                 char buf[PATH_MAX];
8315                 int len;
8316
8317                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
8318                 if (len < 0) {
8319                         err = -EINVAL;
8320                         goto err_unpin;
8321                 } else if (len >= PATH_MAX) {
8322                         err = -ENAMETOOLONG;
8323                         goto err_unpin;
8324                 }
8325
8326                 err = bpf_program__pin_instance(prog, buf, i);
8327                 if (err)
8328                         goto err_unpin;
8329         }
8330
8331         return 0;
8332
8333 err_unpin:
8334         for (i = i - 1; i >= 0; i--) {
8335                 char buf[PATH_MAX];
8336                 int len;
8337
8338                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
8339                 if (len < 0)
8340                         continue;
8341                 else if (len >= PATH_MAX)
8342                         continue;
8343
8344                 bpf_program__unpin_instance(prog, buf, i);
8345         }
8346
8347         rmdir(path);
8348
8349         return libbpf_err(err);
8350 }
8351
8352 int bpf_program__unpin(struct bpf_program *prog, const char *path)
8353 {
8354         int i, err;
8355
8356         err = check_path(path);
8357         if (err)
8358                 return libbpf_err(err);
8359
8360         if (prog == NULL) {
8361                 pr_warn("invalid program pointer\n");
8362                 return libbpf_err(-EINVAL);
8363         }
8364
8365         if (prog->instances.nr <= 0) {
8366                 pr_warn("no instances of prog %s to pin\n", prog->name);
8367                 return libbpf_err(-EINVAL);
8368         }
8369
8370         if (prog->instances.nr == 1) {
8371                 /* don't create subdirs when pinning single instance */
8372                 return bpf_program__unpin_instance(prog, path, 0);
8373         }
8374
8375         for (i = 0; i < prog->instances.nr; i++) {
8376                 char buf[PATH_MAX];
8377                 int len;
8378
8379                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
8380                 if (len < 0)
8381                         return libbpf_err(-EINVAL);
8382                 else if (len >= PATH_MAX)
8383                         return libbpf_err(-ENAMETOOLONG);
8384
8385                 err = bpf_program__unpin_instance(prog, buf, i);
8386                 if (err)
8387                         return err;
8388         }
8389
8390         err = rmdir(path);
8391         if (err)
8392                 return libbpf_err(-errno);
8393
8394         return 0;
8395 }
8396
8397 int bpf_map__pin(struct bpf_map *map, const char *path)
8398 {
8399         char *cp, errmsg[STRERR_BUFSIZE];
8400         int err;
8401
8402         if (map == NULL) {
8403                 pr_warn("invalid map pointer\n");
8404                 return libbpf_err(-EINVAL);
8405         }
8406
8407         if (map->pin_path) {
8408                 if (path && strcmp(path, map->pin_path)) {
8409                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8410                                 bpf_map__name(map), map->pin_path, path);
8411                         return libbpf_err(-EINVAL);
8412                 } else if (map->pinned) {
8413                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
8414                                  bpf_map__name(map), map->pin_path);
8415                         return 0;
8416                 }
8417         } else {
8418                 if (!path) {
8419                         pr_warn("missing a path to pin map '%s' at\n",
8420                                 bpf_map__name(map));
8421                         return libbpf_err(-EINVAL);
8422                 } else if (map->pinned) {
8423                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
8424                         return libbpf_err(-EEXIST);
8425                 }
8426
8427                 map->pin_path = strdup(path);
8428                 if (!map->pin_path) {
8429                         err = -errno;
8430                         goto out_err;
8431                 }
8432         }
8433
8434         err = make_parent_dir(map->pin_path);
8435         if (err)
8436                 return libbpf_err(err);
8437
8438         err = check_path(map->pin_path);
8439         if (err)
8440                 return libbpf_err(err);
8441
8442         if (bpf_obj_pin(map->fd, map->pin_path)) {
8443                 err = -errno;
8444                 goto out_err;
8445         }
8446
8447         map->pinned = true;
8448         pr_debug("pinned map '%s'\n", map->pin_path);
8449
8450         return 0;
8451
8452 out_err:
8453         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8454         pr_warn("failed to pin map: %s\n", cp);
8455         return libbpf_err(err);
8456 }
8457
8458 int bpf_map__unpin(struct bpf_map *map, const char *path)
8459 {
8460         int err;
8461
8462         if (map == NULL) {
8463                 pr_warn("invalid map pointer\n");
8464                 return libbpf_err(-EINVAL);
8465         }
8466
8467         if (map->pin_path) {
8468                 if (path && strcmp(path, map->pin_path)) {
8469                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8470                                 bpf_map__name(map), map->pin_path, path);
8471                         return libbpf_err(-EINVAL);
8472                 }
8473                 path = map->pin_path;
8474         } else if (!path) {
8475                 pr_warn("no path to unpin map '%s' from\n",
8476                         bpf_map__name(map));
8477                 return libbpf_err(-EINVAL);
8478         }
8479
8480         err = check_path(path);
8481         if (err)
8482                 return libbpf_err(err);
8483
8484         err = unlink(path);
8485         if (err != 0)
8486                 return libbpf_err(-errno);
8487
8488         map->pinned = false;
8489         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
8490
8491         return 0;
8492 }
8493
8494 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
8495 {
8496         char *new = NULL;
8497
8498         if (path) {
8499                 new = strdup(path);
8500                 if (!new)
8501                         return libbpf_err(-errno);
8502         }
8503
8504         free(map->pin_path);
8505         map->pin_path = new;
8506         return 0;
8507 }
8508
8509 const char *bpf_map__get_pin_path(const struct bpf_map *map)
8510 {
8511         return map->pin_path;
8512 }
8513
8514 bool bpf_map__is_pinned(const struct bpf_map *map)
8515 {
8516         return map->pinned;
8517 }
8518
8519 static void sanitize_pin_path(char *s)
8520 {
8521         /* bpffs disallows periods in path names */
8522         while (*s) {
8523                 if (*s == '.')
8524                         *s = '_';
8525                 s++;
8526         }
8527 }
8528
8529 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
8530 {
8531         struct bpf_map *map;
8532         int err;
8533
8534         if (!obj)
8535                 return libbpf_err(-ENOENT);
8536
8537         if (!obj->loaded) {
8538                 pr_warn("object not yet loaded; load it first\n");
8539                 return libbpf_err(-ENOENT);
8540         }
8541
8542         bpf_object__for_each_map(map, obj) {
8543                 char *pin_path = NULL;
8544                 char buf[PATH_MAX];
8545
8546                 if (path) {
8547                         int len;
8548
8549                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
8550                                        bpf_map__name(map));
8551                         if (len < 0) {
8552                                 err = -EINVAL;
8553                                 goto err_unpin_maps;
8554                         } else if (len >= PATH_MAX) {
8555                                 err = -ENAMETOOLONG;
8556                                 goto err_unpin_maps;
8557                         }
8558                         sanitize_pin_path(buf);
8559                         pin_path = buf;
8560                 } else if (!map->pin_path) {
8561                         continue;
8562                 }
8563
8564                 err = bpf_map__pin(map, pin_path);
8565                 if (err)
8566                         goto err_unpin_maps;
8567         }
8568
8569         return 0;
8570
8571 err_unpin_maps:
8572         while ((map = bpf_map__prev(map, obj))) {
8573                 if (!map->pin_path)
8574                         continue;
8575
8576                 bpf_map__unpin(map, NULL);
8577         }
8578
8579         return libbpf_err(err);
8580 }
8581
8582 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
8583 {
8584         struct bpf_map *map;
8585         int err;
8586
8587         if (!obj)
8588                 return libbpf_err(-ENOENT);
8589
8590         bpf_object__for_each_map(map, obj) {
8591                 char *pin_path = NULL;
8592                 char buf[PATH_MAX];
8593
8594                 if (path) {
8595                         int len;
8596
8597                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
8598                                        bpf_map__name(map));
8599                         if (len < 0)
8600                                 return libbpf_err(-EINVAL);
8601                         else if (len >= PATH_MAX)
8602                                 return libbpf_err(-ENAMETOOLONG);
8603                         sanitize_pin_path(buf);
8604                         pin_path = buf;
8605                 } else if (!map->pin_path) {
8606                         continue;
8607                 }
8608
8609                 err = bpf_map__unpin(map, pin_path);
8610                 if (err)
8611                         return libbpf_err(err);
8612         }
8613
8614         return 0;
8615 }
8616
8617 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
8618 {
8619         struct bpf_program *prog;
8620         int err;
8621
8622         if (!obj)
8623                 return libbpf_err(-ENOENT);
8624
8625         if (!obj->loaded) {
8626                 pr_warn("object not yet loaded; load it first\n");
8627                 return libbpf_err(-ENOENT);
8628         }
8629
8630         bpf_object__for_each_program(prog, obj) {
8631                 char buf[PATH_MAX];
8632                 int len;
8633
8634                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
8635                                prog->pin_name);
8636                 if (len < 0) {
8637                         err = -EINVAL;
8638                         goto err_unpin_programs;
8639                 } else if (len >= PATH_MAX) {
8640                         err = -ENAMETOOLONG;
8641                         goto err_unpin_programs;
8642                 }
8643
8644                 err = bpf_program__pin(prog, buf);
8645                 if (err)
8646                         goto err_unpin_programs;
8647         }
8648
8649         return 0;
8650
8651 err_unpin_programs:
8652         while ((prog = bpf_program__prev(prog, obj))) {
8653                 char buf[PATH_MAX];
8654                 int len;
8655
8656                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
8657                                prog->pin_name);
8658                 if (len < 0)
8659                         continue;
8660                 else if (len >= PATH_MAX)
8661                         continue;
8662
8663                 bpf_program__unpin(prog, buf);
8664         }
8665
8666         return libbpf_err(err);
8667 }
8668
8669 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8670 {
8671         struct bpf_program *prog;
8672         int err;
8673
8674         if (!obj)
8675                 return libbpf_err(-ENOENT);
8676
8677         bpf_object__for_each_program(prog, obj) {
8678                 char buf[PATH_MAX];
8679                 int len;
8680
8681                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
8682                                prog->pin_name);
8683                 if (len < 0)
8684                         return libbpf_err(-EINVAL);
8685                 else if (len >= PATH_MAX)
8686                         return libbpf_err(-ENAMETOOLONG);
8687
8688                 err = bpf_program__unpin(prog, buf);
8689                 if (err)
8690                         return libbpf_err(err);
8691         }
8692
8693         return 0;
8694 }
8695
8696 int bpf_object__pin(struct bpf_object *obj, const char *path)
8697 {
8698         int err;
8699
8700         err = bpf_object__pin_maps(obj, path);
8701         if (err)
8702                 return libbpf_err(err);
8703
8704         err = bpf_object__pin_programs(obj, path);
8705         if (err) {
8706                 bpf_object__unpin_maps(obj, path);
8707                 return libbpf_err(err);
8708         }
8709
8710         return 0;
8711 }
8712
8713 static void bpf_map__destroy(struct bpf_map *map)
8714 {
8715         if (map->clear_priv)
8716                 map->clear_priv(map, map->priv);
8717         map->priv = NULL;
8718         map->clear_priv = NULL;
8719
8720         if (map->inner_map) {
8721                 bpf_map__destroy(map->inner_map);
8722                 zfree(&map->inner_map);
8723         }
8724
8725         zfree(&map->init_slots);
8726         map->init_slots_sz = 0;
8727
8728         if (map->mmaped) {
8729                 munmap(map->mmaped, bpf_map_mmap_sz(map));
8730                 map->mmaped = NULL;
8731         }
8732
8733         if (map->st_ops) {
8734                 zfree(&map->st_ops->data);
8735                 zfree(&map->st_ops->progs);
8736                 zfree(&map->st_ops->kern_func_off);
8737                 zfree(&map->st_ops);
8738         }
8739
8740         zfree(&map->name);
8741         zfree(&map->pin_path);
8742
8743         if (map->fd >= 0)
8744                 zclose(map->fd);
8745 }
8746
8747 void bpf_object__close(struct bpf_object *obj)
8748 {
8749         size_t i;
8750
8751         if (IS_ERR_OR_NULL(obj))
8752                 return;
8753
8754         if (obj->clear_priv)
8755                 obj->clear_priv(obj, obj->priv);
8756
8757         bpf_gen__free(obj->gen_loader);
8758         bpf_object__elf_finish(obj);
8759         bpf_object__unload(obj);
8760         btf__free(obj->btf);
8761         btf_ext__free(obj->btf_ext);
8762
8763         for (i = 0; i < obj->nr_maps; i++)
8764                 bpf_map__destroy(&obj->maps[i]);
8765
8766         zfree(&obj->btf_custom_path);
8767         zfree(&obj->kconfig);
8768         zfree(&obj->externs);
8769         obj->nr_extern = 0;
8770
8771         zfree(&obj->maps);
8772         obj->nr_maps = 0;
8773
8774         if (obj->programs && obj->nr_programs) {
8775                 for (i = 0; i < obj->nr_programs; i++)
8776                         bpf_program__exit(&obj->programs[i]);
8777         }
8778         zfree(&obj->programs);
8779
8780         list_del(&obj->list);
8781         free(obj);
8782 }
8783
8784 struct bpf_object *
8785 bpf_object__next(struct bpf_object *prev)
8786 {
8787         struct bpf_object *next;
8788
8789         if (!prev)
8790                 next = list_first_entry(&bpf_objects_list,
8791                                         struct bpf_object,
8792                                         list);
8793         else
8794                 next = list_next_entry(prev, list);
8795
8796         /* Empty list is noticed here so don't need checking on entry. */
8797         if (&next->list == &bpf_objects_list)
8798                 return NULL;
8799
8800         return next;
8801 }
8802
8803 const char *bpf_object__name(const struct bpf_object *obj)
8804 {
8805         return obj ? obj->name : libbpf_err_ptr(-EINVAL);
8806 }
8807
8808 unsigned int bpf_object__kversion(const struct bpf_object *obj)
8809 {
8810         return obj ? obj->kern_version : 0;
8811 }
8812
8813 struct btf *bpf_object__btf(const struct bpf_object *obj)
8814 {
8815         return obj ? obj->btf : NULL;
8816 }
8817
8818 int bpf_object__btf_fd(const struct bpf_object *obj)
8819 {
8820         return obj->btf ? btf__fd(obj->btf) : -1;
8821 }
8822
8823 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
8824 {
8825         if (obj->loaded)
8826                 return libbpf_err(-EINVAL);
8827
8828         obj->kern_version = kern_version;
8829
8830         return 0;
8831 }
8832
8833 int bpf_object__set_priv(struct bpf_object *obj, void *priv,
8834                          bpf_object_clear_priv_t clear_priv)
8835 {
8836         if (obj->priv && obj->clear_priv)
8837                 obj->clear_priv(obj, obj->priv);
8838
8839         obj->priv = priv;
8840         obj->clear_priv = clear_priv;
8841         return 0;
8842 }
8843
8844 void *bpf_object__priv(const struct bpf_object *obj)
8845 {
8846         return obj ? obj->priv : libbpf_err_ptr(-EINVAL);
8847 }
8848
8849 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
8850 {
8851         struct bpf_gen *gen;
8852
8853         if (!opts)
8854                 return -EFAULT;
8855         if (!OPTS_VALID(opts, gen_loader_opts))
8856                 return -EINVAL;
8857         gen = calloc(sizeof(*gen), 1);
8858         if (!gen)
8859                 return -ENOMEM;
8860         gen->opts = opts;
8861         obj->gen_loader = gen;
8862         return 0;
8863 }
8864
8865 static struct bpf_program *
8866 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8867                     bool forward)
8868 {
8869         size_t nr_programs = obj->nr_programs;
8870         ssize_t idx;
8871
8872         if (!nr_programs)
8873                 return NULL;
8874
8875         if (!p)
8876                 /* Iter from the beginning */
8877                 return forward ? &obj->programs[0] :
8878                         &obj->programs[nr_programs - 1];
8879
8880         if (p->obj != obj) {
8881                 pr_warn("error: program handler doesn't match object\n");
8882                 return errno = EINVAL, NULL;
8883         }
8884
8885         idx = (p - obj->programs) + (forward ? 1 : -1);
8886         if (idx >= obj->nr_programs || idx < 0)
8887                 return NULL;
8888         return &obj->programs[idx];
8889 }
8890
8891 struct bpf_program *
8892 bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
8893 {
8894         struct bpf_program *prog = prev;
8895
8896         do {
8897                 prog = __bpf_program__iter(prog, obj, true);
8898         } while (prog && prog_is_subprog(obj, prog));
8899
8900         return prog;
8901 }
8902
8903 struct bpf_program *
8904 bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
8905 {
8906         struct bpf_program *prog = next;
8907
8908         do {
8909                 prog = __bpf_program__iter(prog, obj, false);
8910         } while (prog && prog_is_subprog(obj, prog));
8911
8912         return prog;
8913 }
8914
8915 int bpf_program__set_priv(struct bpf_program *prog, void *priv,
8916                           bpf_program_clear_priv_t clear_priv)
8917 {
8918         if (prog->priv && prog->clear_priv)
8919                 prog->clear_priv(prog, prog->priv);
8920
8921         prog->priv = priv;
8922         prog->clear_priv = clear_priv;
8923         return 0;
8924 }
8925
8926 void *bpf_program__priv(const struct bpf_program *prog)
8927 {
8928         return prog ? prog->priv : libbpf_err_ptr(-EINVAL);
8929 }
8930
8931 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
8932 {
8933         prog->prog_ifindex = ifindex;
8934 }
8935
8936 const char *bpf_program__name(const struct bpf_program *prog)
8937 {
8938         return prog->name;
8939 }
8940
8941 const char *bpf_program__section_name(const struct bpf_program *prog)
8942 {
8943         return prog->sec_name;
8944 }
8945
8946 const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
8947 {
8948         const char *title;
8949
8950         title = prog->sec_name;
8951         if (needs_copy) {
8952                 title = strdup(title);
8953                 if (!title) {
8954                         pr_warn("failed to strdup program title\n");
8955                         return libbpf_err_ptr(-ENOMEM);
8956                 }
8957         }
8958
8959         return title;
8960 }
8961
8962 bool bpf_program__autoload(const struct bpf_program *prog)
8963 {
8964         return prog->load;
8965 }
8966
8967 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8968 {
8969         if (prog->obj->loaded)
8970                 return libbpf_err(-EINVAL);
8971
8972         prog->load = autoload;
8973         return 0;
8974 }
8975
8976 int bpf_program__fd(const struct bpf_program *prog)
8977 {
8978         return bpf_program__nth_fd(prog, 0);
8979 }
8980
8981 size_t bpf_program__size(const struct bpf_program *prog)
8982 {
8983         return prog->insns_cnt * BPF_INSN_SZ;
8984 }
8985
8986 int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
8987                           bpf_program_prep_t prep)
8988 {
8989         int *instances_fds;
8990
8991         if (nr_instances <= 0 || !prep)
8992                 return libbpf_err(-EINVAL);
8993
8994         if (prog->instances.nr > 0 || prog->instances.fds) {
8995                 pr_warn("Can't set pre-processor after loading\n");
8996                 return libbpf_err(-EINVAL);
8997         }
8998
8999         instances_fds = malloc(sizeof(int) * nr_instances);
9000         if (!instances_fds) {
9001                 pr_warn("alloc memory failed for fds\n");
9002                 return libbpf_err(-ENOMEM);
9003         }
9004
9005         /* fill all fd with -1 */
9006         memset(instances_fds, -1, sizeof(int) * nr_instances);
9007
9008         prog->instances.nr = nr_instances;
9009         prog->instances.fds = instances_fds;
9010         prog->preprocessor = prep;
9011         return 0;
9012 }
9013
9014 int bpf_program__nth_fd(const struct bpf_program *prog, int n)
9015 {
9016         int fd;
9017
9018         if (!prog)
9019                 return libbpf_err(-EINVAL);
9020
9021         if (n >= prog->instances.nr || n < 0) {
9022                 pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
9023                         n, prog->name, prog->instances.nr);
9024                 return libbpf_err(-EINVAL);
9025         }
9026
9027         fd = prog->instances.fds[n];
9028         if (fd < 0) {
9029                 pr_warn("%dth instance of program '%s' is invalid\n",
9030                         n, prog->name);
9031                 return libbpf_err(-ENOENT);
9032         }
9033
9034         return fd;
9035 }
9036
9037 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog)
9038 {
9039         return prog->type;
9040 }
9041
9042 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
9043 {
9044         prog->type = type;
9045 }
9046
9047 static bool bpf_program__is_type(const struct bpf_program *prog,
9048                                  enum bpf_prog_type type)
9049 {
9050         return prog ? (prog->type == type) : false;
9051 }
9052
9053 #define BPF_PROG_TYPE_FNS(NAME, TYPE)                           \
9054 int bpf_program__set_##NAME(struct bpf_program *prog)           \
9055 {                                                               \
9056         if (!prog)                                              \
9057                 return libbpf_err(-EINVAL);                     \
9058         bpf_program__set_type(prog, TYPE);                      \
9059         return 0;                                               \
9060 }                                                               \
9061                                                                 \
9062 bool bpf_program__is_##NAME(const struct bpf_program *prog)     \
9063 {                                                               \
9064         return bpf_program__is_type(prog, TYPE);                \
9065 }                                                               \
9066
9067 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
9068 BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
9069 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
9070 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
9071 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
9072 BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
9073 BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
9074 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
9075 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
9076 BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
9077 BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
9078 BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
9079 BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
9080
9081 enum bpf_attach_type
9082 bpf_program__get_expected_attach_type(const struct bpf_program *prog)
9083 {
9084         return prog->expected_attach_type;
9085 }
9086
9087 void bpf_program__set_expected_attach_type(struct bpf_program *prog,
9088                                            enum bpf_attach_type type)
9089 {
9090         prog->expected_attach_type = type;
9091 }
9092
9093 #define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional,           \
9094                           attachable, attach_btf)                           \
9095         {                                                                   \
9096                 .sec = string,                                              \
9097                 .len = sizeof(string) - 1,                                  \
9098                 .prog_type = ptype,                                         \
9099                 .expected_attach_type = eatype,                             \
9100                 .is_exp_attach_type_optional = eatype_optional,             \
9101                 .is_attachable = attachable,                                \
9102                 .is_attach_btf = attach_btf,                                \
9103         }
9104
9105 /* Programs that can NOT be attached. */
9106 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
9107
9108 /* Programs that can be attached. */
9109 #define BPF_APROG_SEC(string, ptype, atype) \
9110         BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0)
9111
9112 /* Programs that must specify expected attach type at load time. */
9113 #define BPF_EAPROG_SEC(string, ptype, eatype) \
9114         BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0)
9115
9116 /* Programs that use BTF to identify attach point */
9117 #define BPF_PROG_BTF(string, ptype, eatype) \
9118         BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1)
9119
9120 /* Programs that can be attached but attach type can't be identified by section
9121  * name. Kept for backward compatibility.
9122  */
9123 #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
9124
9125 #define SEC_DEF(sec_pfx, ptype, ...) {                                      \
9126         .sec = sec_pfx,                                                     \
9127         .len = sizeof(sec_pfx) - 1,                                         \
9128         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
9129         __VA_ARGS__                                                         \
9130 }
9131
9132 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
9133                                       struct bpf_program *prog);
9134 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
9135                                   struct bpf_program *prog);
9136 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
9137                                       struct bpf_program *prog);
9138 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
9139                                      struct bpf_program *prog);
9140 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
9141                                    struct bpf_program *prog);
9142 static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
9143                                     struct bpf_program *prog);
9144
9145 static const struct bpf_sec_def section_defs[] = {
9146         BPF_PROG_SEC("socket",                  BPF_PROG_TYPE_SOCKET_FILTER),
9147         BPF_EAPROG_SEC("sk_reuseport/migrate",  BPF_PROG_TYPE_SK_REUSEPORT,
9148                                                 BPF_SK_REUSEPORT_SELECT_OR_MIGRATE),
9149         BPF_EAPROG_SEC("sk_reuseport",          BPF_PROG_TYPE_SK_REUSEPORT,
9150                                                 BPF_SK_REUSEPORT_SELECT),
9151         SEC_DEF("kprobe/", KPROBE,
9152                 .attach_fn = attach_kprobe),
9153         BPF_PROG_SEC("uprobe/",                 BPF_PROG_TYPE_KPROBE),
9154         SEC_DEF("kretprobe/", KPROBE,
9155                 .attach_fn = attach_kprobe),
9156         BPF_PROG_SEC("uretprobe/",              BPF_PROG_TYPE_KPROBE),
9157         BPF_PROG_SEC("classifier",              BPF_PROG_TYPE_SCHED_CLS),
9158         BPF_PROG_SEC("action",                  BPF_PROG_TYPE_SCHED_ACT),
9159         SEC_DEF("tracepoint/", TRACEPOINT,
9160                 .attach_fn = attach_tp),
9161         SEC_DEF("tp/", TRACEPOINT,
9162                 .attach_fn = attach_tp),
9163         SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
9164                 .attach_fn = attach_raw_tp),
9165         SEC_DEF("raw_tp/", RAW_TRACEPOINT,
9166                 .attach_fn = attach_raw_tp),
9167         SEC_DEF("tp_btf/", TRACING,
9168                 .expected_attach_type = BPF_TRACE_RAW_TP,
9169                 .is_attach_btf = true,
9170                 .attach_fn = attach_trace),
9171         SEC_DEF("fentry/", TRACING,
9172                 .expected_attach_type = BPF_TRACE_FENTRY,
9173                 .is_attach_btf = true,
9174                 .attach_fn = attach_trace),
9175         SEC_DEF("fmod_ret/", TRACING,
9176                 .expected_attach_type = BPF_MODIFY_RETURN,
9177                 .is_attach_btf = true,
9178                 .attach_fn = attach_trace),
9179         SEC_DEF("fexit/", TRACING,
9180                 .expected_attach_type = BPF_TRACE_FEXIT,
9181                 .is_attach_btf = true,
9182                 .attach_fn = attach_trace),
9183         SEC_DEF("fentry.s/", TRACING,
9184                 .expected_attach_type = BPF_TRACE_FENTRY,
9185                 .is_attach_btf = true,
9186                 .is_sleepable = true,
9187                 .attach_fn = attach_trace),
9188         SEC_DEF("fmod_ret.s/", TRACING,
9189                 .expected_attach_type = BPF_MODIFY_RETURN,
9190                 .is_attach_btf = true,
9191                 .is_sleepable = true,
9192                 .attach_fn = attach_trace),
9193         SEC_DEF("fexit.s/", TRACING,
9194                 .expected_attach_type = BPF_TRACE_FEXIT,
9195                 .is_attach_btf = true,
9196                 .is_sleepable = true,
9197                 .attach_fn = attach_trace),
9198         SEC_DEF("freplace/", EXT,
9199                 .is_attach_btf = true,
9200                 .attach_fn = attach_trace),
9201         SEC_DEF("lsm/", LSM,
9202                 .is_attach_btf = true,
9203                 .expected_attach_type = BPF_LSM_MAC,
9204                 .attach_fn = attach_lsm),
9205         SEC_DEF("lsm.s/", LSM,
9206                 .is_attach_btf = true,
9207                 .is_sleepable = true,
9208                 .expected_attach_type = BPF_LSM_MAC,
9209                 .attach_fn = attach_lsm),
9210         SEC_DEF("iter/", TRACING,
9211                 .expected_attach_type = BPF_TRACE_ITER,
9212                 .is_attach_btf = true,
9213                 .attach_fn = attach_iter),
9214         SEC_DEF("syscall", SYSCALL,
9215                 .is_sleepable = true),
9216         BPF_EAPROG_SEC("xdp_devmap/",           BPF_PROG_TYPE_XDP,
9217                                                 BPF_XDP_DEVMAP),
9218         BPF_EAPROG_SEC("xdp_cpumap/",           BPF_PROG_TYPE_XDP,
9219                                                 BPF_XDP_CPUMAP),
9220         BPF_APROG_SEC("xdp",                    BPF_PROG_TYPE_XDP,
9221                                                 BPF_XDP),
9222         BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
9223         BPF_PROG_SEC("lwt_in",                  BPF_PROG_TYPE_LWT_IN),
9224         BPF_PROG_SEC("lwt_out",                 BPF_PROG_TYPE_LWT_OUT),
9225         BPF_PROG_SEC("lwt_xmit",                BPF_PROG_TYPE_LWT_XMIT),
9226         BPF_PROG_SEC("lwt_seg6local",           BPF_PROG_TYPE_LWT_SEG6LOCAL),
9227         BPF_APROG_SEC("cgroup_skb/ingress",     BPF_PROG_TYPE_CGROUP_SKB,
9228                                                 BPF_CGROUP_INET_INGRESS),
9229         BPF_APROG_SEC("cgroup_skb/egress",      BPF_PROG_TYPE_CGROUP_SKB,
9230                                                 BPF_CGROUP_INET_EGRESS),
9231         BPF_APROG_COMPAT("cgroup/skb",          BPF_PROG_TYPE_CGROUP_SKB),
9232         BPF_EAPROG_SEC("cgroup/sock_create",    BPF_PROG_TYPE_CGROUP_SOCK,
9233                                                 BPF_CGROUP_INET_SOCK_CREATE),
9234         BPF_EAPROG_SEC("cgroup/sock_release",   BPF_PROG_TYPE_CGROUP_SOCK,
9235                                                 BPF_CGROUP_INET_SOCK_RELEASE),
9236         BPF_APROG_SEC("cgroup/sock",            BPF_PROG_TYPE_CGROUP_SOCK,
9237                                                 BPF_CGROUP_INET_SOCK_CREATE),
9238         BPF_EAPROG_SEC("cgroup/post_bind4",     BPF_PROG_TYPE_CGROUP_SOCK,
9239                                                 BPF_CGROUP_INET4_POST_BIND),
9240         BPF_EAPROG_SEC("cgroup/post_bind6",     BPF_PROG_TYPE_CGROUP_SOCK,
9241                                                 BPF_CGROUP_INET6_POST_BIND),
9242         BPF_APROG_SEC("cgroup/dev",             BPF_PROG_TYPE_CGROUP_DEVICE,
9243                                                 BPF_CGROUP_DEVICE),
9244         BPF_APROG_SEC("sockops",                BPF_PROG_TYPE_SOCK_OPS,
9245                                                 BPF_CGROUP_SOCK_OPS),
9246         BPF_APROG_SEC("sk_skb/stream_parser",   BPF_PROG_TYPE_SK_SKB,
9247                                                 BPF_SK_SKB_STREAM_PARSER),
9248         BPF_APROG_SEC("sk_skb/stream_verdict",  BPF_PROG_TYPE_SK_SKB,
9249                                                 BPF_SK_SKB_STREAM_VERDICT),
9250         BPF_APROG_COMPAT("sk_skb",              BPF_PROG_TYPE_SK_SKB),
9251         BPF_APROG_SEC("sk_msg",                 BPF_PROG_TYPE_SK_MSG,
9252                                                 BPF_SK_MSG_VERDICT),
9253         BPF_APROG_SEC("lirc_mode2",             BPF_PROG_TYPE_LIRC_MODE2,
9254                                                 BPF_LIRC_MODE2),
9255         BPF_APROG_SEC("flow_dissector",         BPF_PROG_TYPE_FLOW_DISSECTOR,
9256                                                 BPF_FLOW_DISSECTOR),
9257         BPF_EAPROG_SEC("cgroup/bind4",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9258                                                 BPF_CGROUP_INET4_BIND),
9259         BPF_EAPROG_SEC("cgroup/bind6",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9260                                                 BPF_CGROUP_INET6_BIND),
9261         BPF_EAPROG_SEC("cgroup/connect4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9262                                                 BPF_CGROUP_INET4_CONNECT),
9263         BPF_EAPROG_SEC("cgroup/connect6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9264                                                 BPF_CGROUP_INET6_CONNECT),
9265         BPF_EAPROG_SEC("cgroup/sendmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9266                                                 BPF_CGROUP_UDP4_SENDMSG),
9267         BPF_EAPROG_SEC("cgroup/sendmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9268                                                 BPF_CGROUP_UDP6_SENDMSG),
9269         BPF_EAPROG_SEC("cgroup/recvmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9270                                                 BPF_CGROUP_UDP4_RECVMSG),
9271         BPF_EAPROG_SEC("cgroup/recvmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9272                                                 BPF_CGROUP_UDP6_RECVMSG),
9273         BPF_EAPROG_SEC("cgroup/getpeername4",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9274                                                 BPF_CGROUP_INET4_GETPEERNAME),
9275         BPF_EAPROG_SEC("cgroup/getpeername6",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9276                                                 BPF_CGROUP_INET6_GETPEERNAME),
9277         BPF_EAPROG_SEC("cgroup/getsockname4",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9278                                                 BPF_CGROUP_INET4_GETSOCKNAME),
9279         BPF_EAPROG_SEC("cgroup/getsockname6",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9280                                                 BPF_CGROUP_INET6_GETSOCKNAME),
9281         BPF_EAPROG_SEC("cgroup/sysctl",         BPF_PROG_TYPE_CGROUP_SYSCTL,
9282                                                 BPF_CGROUP_SYSCTL),
9283         BPF_EAPROG_SEC("cgroup/getsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
9284                                                 BPF_CGROUP_GETSOCKOPT),
9285         BPF_EAPROG_SEC("cgroup/setsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
9286                                                 BPF_CGROUP_SETSOCKOPT),
9287         BPF_PROG_SEC("struct_ops",              BPF_PROG_TYPE_STRUCT_OPS),
9288         BPF_EAPROG_SEC("sk_lookup/",            BPF_PROG_TYPE_SK_LOOKUP,
9289                                                 BPF_SK_LOOKUP),
9290 };
9291
9292 #undef BPF_PROG_SEC_IMPL
9293 #undef BPF_PROG_SEC
9294 #undef BPF_APROG_SEC
9295 #undef BPF_EAPROG_SEC
9296 #undef BPF_APROG_COMPAT
9297 #undef SEC_DEF
9298
9299 #define MAX_TYPE_NAME_SIZE 32
9300
9301 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
9302 {
9303         int i, n = ARRAY_SIZE(section_defs);
9304
9305         for (i = 0; i < n; i++) {
9306                 if (strncmp(sec_name,
9307                             section_defs[i].sec, section_defs[i].len))
9308                         continue;
9309                 return &section_defs[i];
9310         }
9311         return NULL;
9312 }
9313
9314 static char *libbpf_get_type_names(bool attach_type)
9315 {
9316         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
9317         char *buf;
9318
9319         buf = malloc(len);
9320         if (!buf)
9321                 return NULL;
9322
9323         buf[0] = '\0';
9324         /* Forge string buf with all available names */
9325         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9326                 if (attach_type && !section_defs[i].is_attachable)
9327                         continue;
9328
9329                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
9330                         free(buf);
9331                         return NULL;
9332                 }
9333                 strcat(buf, " ");
9334                 strcat(buf, section_defs[i].sec);
9335         }
9336
9337         return buf;
9338 }
9339
9340 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
9341                              enum bpf_attach_type *expected_attach_type)
9342 {
9343         const struct bpf_sec_def *sec_def;
9344         char *type_names;
9345
9346         if (!name)
9347                 return libbpf_err(-EINVAL);
9348
9349         sec_def = find_sec_def(name);
9350         if (sec_def) {
9351                 *prog_type = sec_def->prog_type;
9352                 *expected_attach_type = sec_def->expected_attach_type;
9353                 return 0;
9354         }
9355
9356         pr_debug("failed to guess program type from ELF section '%s'\n", name);
9357         type_names = libbpf_get_type_names(false);
9358         if (type_names != NULL) {
9359                 pr_debug("supported section(type) names are:%s\n", type_names);
9360                 free(type_names);
9361         }
9362
9363         return libbpf_err(-ESRCH);
9364 }
9365
9366 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
9367                                                      size_t offset)
9368 {
9369         struct bpf_map *map;
9370         size_t i;
9371
9372         for (i = 0; i < obj->nr_maps; i++) {
9373                 map = &obj->maps[i];
9374                 if (!bpf_map__is_struct_ops(map))
9375                         continue;
9376                 if (map->sec_offset <= offset &&
9377                     offset - map->sec_offset < map->def.value_size)
9378                         return map;
9379         }
9380
9381         return NULL;
9382 }
9383
9384 /* Collect the reloc from ELF and populate the st_ops->progs[] */
9385 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
9386                                             GElf_Shdr *shdr, Elf_Data *data)
9387 {
9388         const struct btf_member *member;
9389         struct bpf_struct_ops *st_ops;
9390         struct bpf_program *prog;
9391         unsigned int shdr_idx;
9392         const struct btf *btf;
9393         struct bpf_map *map;
9394         Elf_Data *symbols;
9395         unsigned int moff, insn_idx;
9396         const char *name;
9397         __u32 member_idx;
9398         GElf_Sym sym;
9399         GElf_Rel rel;
9400         int i, nrels;
9401
9402         symbols = obj->efile.symbols;
9403         btf = obj->btf;
9404         nrels = shdr->sh_size / shdr->sh_entsize;
9405         for (i = 0; i < nrels; i++) {
9406                 if (!gelf_getrel(data, i, &rel)) {
9407                         pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
9408                         return -LIBBPF_ERRNO__FORMAT;
9409                 }
9410
9411                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
9412                         pr_warn("struct_ops reloc: symbol %zx not found\n",
9413                                 (size_t)GELF_R_SYM(rel.r_info));
9414                         return -LIBBPF_ERRNO__FORMAT;
9415                 }
9416
9417                 name = elf_sym_str(obj, sym.st_name) ?: "<?>";
9418                 map = find_struct_ops_map_by_offset(obj, rel.r_offset);
9419                 if (!map) {
9420                         pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
9421                                 (size_t)rel.r_offset);
9422                         return -EINVAL;
9423                 }
9424
9425                 moff = rel.r_offset - map->sec_offset;
9426                 shdr_idx = sym.st_shndx;
9427                 st_ops = map->st_ops;
9428                 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
9429                          map->name,
9430                          (long long)(rel.r_info >> 32),
9431                          (long long)sym.st_value,
9432                          shdr_idx, (size_t)rel.r_offset,
9433                          map->sec_offset, sym.st_name, name);
9434
9435                 if (shdr_idx >= SHN_LORESERVE) {
9436                         pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n",
9437                                 map->name, (size_t)rel.r_offset, shdr_idx);
9438                         return -LIBBPF_ERRNO__RELOC;
9439                 }
9440                 if (sym.st_value % BPF_INSN_SZ) {
9441                         pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
9442                                 map->name, (unsigned long long)sym.st_value);
9443                         return -LIBBPF_ERRNO__FORMAT;
9444                 }
9445                 insn_idx = sym.st_value / BPF_INSN_SZ;
9446
9447                 member = find_member_by_offset(st_ops->type, moff * 8);
9448                 if (!member) {
9449                         pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
9450                                 map->name, moff);
9451                         return -EINVAL;
9452                 }
9453                 member_idx = member - btf_members(st_ops->type);
9454                 name = btf__name_by_offset(btf, member->name_off);
9455
9456                 if (!resolve_func_ptr(btf, member->type, NULL)) {
9457                         pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
9458                                 map->name, name);
9459                         return -EINVAL;
9460                 }
9461
9462                 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
9463                 if (!prog) {
9464                         pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
9465                                 map->name, shdr_idx, name);
9466                         return -EINVAL;
9467                 }
9468
9469                 if (prog->type == BPF_PROG_TYPE_UNSPEC) {
9470                         const struct bpf_sec_def *sec_def;
9471
9472                         sec_def = find_sec_def(prog->sec_name);
9473                         if (sec_def &&
9474                             sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
9475                                 /* for pr_warn */
9476                                 prog->type = sec_def->prog_type;
9477                                 goto invalid_prog;
9478                         }
9479
9480                         prog->type = BPF_PROG_TYPE_STRUCT_OPS;
9481                         prog->attach_btf_id = st_ops->type_id;
9482                         prog->expected_attach_type = member_idx;
9483                 } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
9484                            prog->attach_btf_id != st_ops->type_id ||
9485                            prog->expected_attach_type != member_idx) {
9486                         goto invalid_prog;
9487                 }
9488                 st_ops->progs[member_idx] = prog;
9489         }
9490
9491         return 0;
9492
9493 invalid_prog:
9494         pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
9495                 map->name, prog->name, prog->sec_name, prog->type,
9496                 prog->attach_btf_id, prog->expected_attach_type, name);
9497         return -EINVAL;
9498 }
9499
9500 #define BTF_TRACE_PREFIX "btf_trace_"
9501 #define BTF_LSM_PREFIX "bpf_lsm_"
9502 #define BTF_ITER_PREFIX "bpf_iter_"
9503 #define BTF_MAX_NAME_SIZE 128
9504
9505 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
9506                                 const char **prefix, int *kind)
9507 {
9508         switch (attach_type) {
9509         case BPF_TRACE_RAW_TP:
9510                 *prefix = BTF_TRACE_PREFIX;
9511                 *kind = BTF_KIND_TYPEDEF;
9512                 break;
9513         case BPF_LSM_MAC:
9514                 *prefix = BTF_LSM_PREFIX;
9515                 *kind = BTF_KIND_FUNC;
9516                 break;
9517         case BPF_TRACE_ITER:
9518                 *prefix = BTF_ITER_PREFIX;
9519                 *kind = BTF_KIND_FUNC;
9520                 break;
9521         default:
9522                 *prefix = "";
9523                 *kind = BTF_KIND_FUNC;
9524         }
9525 }
9526
9527 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
9528                                    const char *name, __u32 kind)
9529 {
9530         char btf_type_name[BTF_MAX_NAME_SIZE];
9531         int ret;
9532
9533         ret = snprintf(btf_type_name, sizeof(btf_type_name),
9534                        "%s%s", prefix, name);
9535         /* snprintf returns the number of characters written excluding the
9536          * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
9537          * indicates truncation.
9538          */
9539         if (ret < 0 || ret >= sizeof(btf_type_name))
9540                 return -ENAMETOOLONG;
9541         return btf__find_by_name_kind(btf, btf_type_name, kind);
9542 }
9543
9544 static inline int find_attach_btf_id(struct btf *btf, const char *name,
9545                                      enum bpf_attach_type attach_type)
9546 {
9547         const char *prefix;
9548         int kind;
9549
9550         btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
9551         return find_btf_by_prefix_kind(btf, prefix, name, kind);
9552 }
9553
9554 int libbpf_find_vmlinux_btf_id(const char *name,
9555                                enum bpf_attach_type attach_type)
9556 {
9557         struct btf *btf;
9558         int err;
9559
9560         btf = libbpf_find_kernel_btf();
9561         err = libbpf_get_error(btf);
9562         if (err) {
9563                 pr_warn("vmlinux BTF is not found\n");
9564                 return libbpf_err(err);
9565         }
9566
9567         err = find_attach_btf_id(btf, name, attach_type);
9568         if (err <= 0)
9569                 pr_warn("%s is not found in vmlinux BTF\n", name);
9570
9571         btf__free(btf);
9572         return libbpf_err(err);
9573 }
9574
9575 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
9576 {
9577         struct bpf_prog_info_linear *info_linear;
9578         struct bpf_prog_info *info;
9579         struct btf *btf = NULL;
9580         int err = -EINVAL;
9581
9582         info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
9583         err = libbpf_get_error(info_linear);
9584         if (err) {
9585                 pr_warn("failed get_prog_info_linear for FD %d\n",
9586                         attach_prog_fd);
9587                 return err;
9588         }
9589         info = &info_linear->info;
9590         if (!info->btf_id) {
9591                 pr_warn("The target program doesn't have BTF\n");
9592                 goto out;
9593         }
9594         if (btf__get_from_id(info->btf_id, &btf)) {
9595                 pr_warn("Failed to get BTF of the program\n");
9596                 goto out;
9597         }
9598         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
9599         btf__free(btf);
9600         if (err <= 0) {
9601                 pr_warn("%s is not found in prog's BTF\n", name);
9602                 goto out;
9603         }
9604 out:
9605         free(info_linear);
9606         return err;
9607 }
9608
9609 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
9610                               enum bpf_attach_type attach_type,
9611                               int *btf_obj_fd, int *btf_type_id)
9612 {
9613         int ret, i;
9614
9615         ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
9616         if (ret > 0) {
9617                 *btf_obj_fd = 0; /* vmlinux BTF */
9618                 *btf_type_id = ret;
9619                 return 0;
9620         }
9621         if (ret != -ENOENT)
9622                 return ret;
9623
9624         ret = load_module_btfs(obj);
9625         if (ret)
9626                 return ret;
9627
9628         for (i = 0; i < obj->btf_module_cnt; i++) {
9629                 const struct module_btf *mod = &obj->btf_modules[i];
9630
9631                 ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
9632                 if (ret > 0) {
9633                         *btf_obj_fd = mod->fd;
9634                         *btf_type_id = ret;
9635                         return 0;
9636                 }
9637                 if (ret == -ENOENT)
9638                         continue;
9639
9640                 return ret;
9641         }
9642
9643         return -ESRCH;
9644 }
9645
9646 static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id)
9647 {
9648         enum bpf_attach_type attach_type = prog->expected_attach_type;
9649         __u32 attach_prog_fd = prog->attach_prog_fd;
9650         const char *name = prog->sec_name, *attach_name;
9651         const struct bpf_sec_def *sec = NULL;
9652         int i, err = 0;
9653
9654         if (!name)
9655                 return -EINVAL;
9656
9657         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9658                 if (!section_defs[i].is_attach_btf)
9659                         continue;
9660                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
9661                         continue;
9662
9663                 sec = &section_defs[i];
9664                 break;
9665         }
9666
9667         if (!sec) {
9668                 pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", name);
9669                 return -ESRCH;
9670         }
9671         attach_name = name + sec->len;
9672
9673         /* BPF program's BTF ID */
9674         if (attach_prog_fd) {
9675                 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
9676                 if (err < 0) {
9677                         pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9678                                  attach_prog_fd, attach_name, err);
9679                         return err;
9680                 }
9681                 *btf_obj_fd = 0;
9682                 *btf_type_id = err;
9683                 return 0;
9684         }
9685
9686         /* kernel/module BTF ID */
9687         if (prog->obj->gen_loader) {
9688                 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
9689                 *btf_obj_fd = 0;
9690                 *btf_type_id = 1;
9691         } else {
9692                 err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
9693         }
9694         if (err) {
9695                 pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
9696                 return err;
9697         }
9698         return 0;
9699 }
9700
9701 int libbpf_attach_type_by_name(const char *name,
9702                                enum bpf_attach_type *attach_type)
9703 {
9704         char *type_names;
9705         int i;
9706
9707         if (!name)
9708                 return libbpf_err(-EINVAL);
9709
9710         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9711                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
9712                         continue;
9713                 if (!section_defs[i].is_attachable)
9714                         return libbpf_err(-EINVAL);
9715                 *attach_type = section_defs[i].expected_attach_type;
9716                 return 0;
9717         }
9718         pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
9719         type_names = libbpf_get_type_names(true);
9720         if (type_names != NULL) {
9721                 pr_debug("attachable section(type) names are:%s\n", type_names);
9722                 free(type_names);
9723         }
9724
9725         return libbpf_err(-EINVAL);
9726 }
9727
9728 int bpf_map__fd(const struct bpf_map *map)
9729 {
9730         return map ? map->fd : libbpf_err(-EINVAL);
9731 }
9732
9733 const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
9734 {
9735         return map ? &map->def : libbpf_err_ptr(-EINVAL);
9736 }
9737
9738 const char *bpf_map__name(const struct bpf_map *map)
9739 {
9740         return map ? map->name : NULL;
9741 }
9742
9743 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
9744 {
9745         return map->def.type;
9746 }
9747
9748 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
9749 {
9750         if (map->fd >= 0)
9751                 return libbpf_err(-EBUSY);
9752         map->def.type = type;
9753         return 0;
9754 }
9755
9756 __u32 bpf_map__map_flags(const struct bpf_map *map)
9757 {
9758         return map->def.map_flags;
9759 }
9760
9761 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
9762 {
9763         if (map->fd >= 0)
9764                 return libbpf_err(-EBUSY);
9765         map->def.map_flags = flags;
9766         return 0;
9767 }
9768
9769 __u32 bpf_map__numa_node(const struct bpf_map *map)
9770 {
9771         return map->numa_node;
9772 }
9773
9774 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
9775 {
9776         if (map->fd >= 0)
9777                 return libbpf_err(-EBUSY);
9778         map->numa_node = numa_node;
9779         return 0;
9780 }
9781
9782 __u32 bpf_map__key_size(const struct bpf_map *map)
9783 {
9784         return map->def.key_size;
9785 }
9786
9787 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
9788 {
9789         if (map->fd >= 0)
9790                 return libbpf_err(-EBUSY);
9791         map->def.key_size = size;
9792         return 0;
9793 }
9794
9795 __u32 bpf_map__value_size(const struct bpf_map *map)
9796 {
9797         return map->def.value_size;
9798 }
9799
9800 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
9801 {
9802         if (map->fd >= 0)
9803                 return libbpf_err(-EBUSY);
9804         map->def.value_size = size;
9805         return 0;
9806 }
9807
9808 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
9809 {
9810         return map ? map->btf_key_type_id : 0;
9811 }
9812
9813 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
9814 {
9815         return map ? map->btf_value_type_id : 0;
9816 }
9817
9818 int bpf_map__set_priv(struct bpf_map *map, void *priv,
9819                      bpf_map_clear_priv_t clear_priv)
9820 {
9821         if (!map)
9822                 return libbpf_err(-EINVAL);
9823
9824         if (map->priv) {
9825                 if (map->clear_priv)
9826                         map->clear_priv(map, map->priv);
9827         }
9828
9829         map->priv = priv;
9830         map->clear_priv = clear_priv;
9831         return 0;
9832 }
9833
9834 void *bpf_map__priv(const struct bpf_map *map)
9835 {
9836         return map ? map->priv : libbpf_err_ptr(-EINVAL);
9837 }
9838
9839 int bpf_map__set_initial_value(struct bpf_map *map,
9840                                const void *data, size_t size)
9841 {
9842         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
9843             size != map->def.value_size || map->fd >= 0)
9844                 return libbpf_err(-EINVAL);
9845
9846         memcpy(map->mmaped, data, size);
9847         return 0;
9848 }
9849
9850 const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
9851 {
9852         if (!map->mmaped)
9853                 return NULL;
9854         *psize = map->def.value_size;
9855         return map->mmaped;
9856 }
9857
9858 bool bpf_map__is_offload_neutral(const struct bpf_map *map)
9859 {
9860         return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
9861 }
9862
9863 bool bpf_map__is_internal(const struct bpf_map *map)
9864 {
9865         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
9866 }
9867
9868 __u32 bpf_map__ifindex(const struct bpf_map *map)
9869 {
9870         return map->map_ifindex;
9871 }
9872
9873 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
9874 {
9875         if (map->fd >= 0)
9876                 return libbpf_err(-EBUSY);
9877         map->map_ifindex = ifindex;
9878         return 0;
9879 }
9880
9881 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
9882 {
9883         if (!bpf_map_type__is_map_in_map(map->def.type)) {
9884                 pr_warn("error: unsupported map type\n");
9885                 return libbpf_err(-EINVAL);
9886         }
9887         if (map->inner_map_fd != -1) {
9888                 pr_warn("error: inner_map_fd already specified\n");
9889                 return libbpf_err(-EINVAL);
9890         }
9891         zfree(&map->inner_map);
9892         map->inner_map_fd = fd;
9893         return 0;
9894 }
9895
9896 static struct bpf_map *
9897 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
9898 {
9899         ssize_t idx;
9900         struct bpf_map *s, *e;
9901
9902         if (!obj || !obj->maps)
9903                 return errno = EINVAL, NULL;
9904
9905         s = obj->maps;
9906         e = obj->maps + obj->nr_maps;
9907
9908         if ((m < s) || (m >= e)) {
9909                 pr_warn("error in %s: map handler doesn't belong to object\n",
9910                          __func__);
9911                 return errno = EINVAL, NULL;
9912         }
9913
9914         idx = (m - obj->maps) + i;
9915         if (idx >= obj->nr_maps || idx < 0)
9916                 return NULL;
9917         return &obj->maps[idx];
9918 }
9919
9920 struct bpf_map *
9921 bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
9922 {
9923         if (prev == NULL)
9924                 return obj->maps;
9925
9926         return __bpf_map__iter(prev, obj, 1);
9927 }
9928
9929 struct bpf_map *
9930 bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
9931 {
9932         if (next == NULL) {
9933                 if (!obj->nr_maps)
9934                         return NULL;
9935                 return obj->maps + obj->nr_maps - 1;
9936         }
9937
9938         return __bpf_map__iter(next, obj, -1);
9939 }
9940
9941 struct bpf_map *
9942 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
9943 {
9944         struct bpf_map *pos;
9945
9946         bpf_object__for_each_map(pos, obj) {
9947                 if (pos->name && !strcmp(pos->name, name))
9948                         return pos;
9949         }
9950         return errno = ENOENT, NULL;
9951 }
9952
9953 int
9954 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
9955 {
9956         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
9957 }
9958
9959 struct bpf_map *
9960 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
9961 {
9962         return libbpf_err_ptr(-ENOTSUP);
9963 }
9964
9965 long libbpf_get_error(const void *ptr)
9966 {
9967         if (!IS_ERR_OR_NULL(ptr))
9968                 return 0;
9969
9970         if (IS_ERR(ptr))
9971                 errno = -PTR_ERR(ptr);
9972
9973         /* If ptr == NULL, then errno should be already set by the failing
9974          * API, because libbpf never returns NULL on success and it now always
9975          * sets errno on error. So no extra errno handling for ptr == NULL
9976          * case.
9977          */
9978         return -errno;
9979 }
9980
9981 int bpf_prog_load(const char *file, enum bpf_prog_type type,
9982                   struct bpf_object **pobj, int *prog_fd)
9983 {
9984         struct bpf_prog_load_attr attr;
9985
9986         memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
9987         attr.file = file;
9988         attr.prog_type = type;
9989         attr.expected_attach_type = 0;
9990
9991         return bpf_prog_load_xattr(&attr, pobj, prog_fd);
9992 }
9993
9994 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
9995                         struct bpf_object **pobj, int *prog_fd)
9996 {
9997         struct bpf_object_open_attr open_attr = {};
9998         struct bpf_program *prog, *first_prog = NULL;
9999         struct bpf_object *obj;
10000         struct bpf_map *map;
10001         int err;
10002
10003         if (!attr)
10004                 return libbpf_err(-EINVAL);
10005         if (!attr->file)
10006                 return libbpf_err(-EINVAL);
10007
10008         open_attr.file = attr->file;
10009         open_attr.prog_type = attr->prog_type;
10010
10011         obj = bpf_object__open_xattr(&open_attr);
10012         err = libbpf_get_error(obj);
10013         if (err)
10014                 return libbpf_err(-ENOENT);
10015
10016         bpf_object__for_each_program(prog, obj) {
10017                 enum bpf_attach_type attach_type = attr->expected_attach_type;
10018                 /*
10019                  * to preserve backwards compatibility, bpf_prog_load treats
10020                  * attr->prog_type, if specified, as an override to whatever
10021                  * bpf_object__open guessed
10022                  */
10023                 if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
10024                         bpf_program__set_type(prog, attr->prog_type);
10025                         bpf_program__set_expected_attach_type(prog,
10026                                                               attach_type);
10027                 }
10028                 if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
10029                         /*
10030                          * we haven't guessed from section name and user
10031                          * didn't provide a fallback type, too bad...
10032                          */
10033                         bpf_object__close(obj);
10034                         return libbpf_err(-EINVAL);
10035                 }
10036
10037                 prog->prog_ifindex = attr->ifindex;
10038                 prog->log_level = attr->log_level;
10039                 prog->prog_flags |= attr->prog_flags;
10040                 if (!first_prog)
10041                         first_prog = prog;
10042         }
10043
10044         bpf_object__for_each_map(map, obj) {
10045                 if (!bpf_map__is_offload_neutral(map))
10046                         map->map_ifindex = attr->ifindex;
10047         }
10048
10049         if (!first_prog) {
10050                 pr_warn("object file doesn't contain bpf program\n");
10051                 bpf_object__close(obj);
10052                 return libbpf_err(-ENOENT);
10053         }
10054
10055         err = bpf_object__load(obj);
10056         if (err) {
10057                 bpf_object__close(obj);
10058                 return libbpf_err(err);
10059         }
10060
10061         *pobj = obj;
10062         *prog_fd = bpf_program__fd(first_prog);
10063         return 0;
10064 }
10065
10066 struct bpf_link {
10067         int (*detach)(struct bpf_link *link);
10068         int (*destroy)(struct bpf_link *link);
10069         char *pin_path;         /* NULL, if not pinned */
10070         int fd;                 /* hook FD, -1 if not applicable */
10071         bool disconnected;
10072 };
10073
10074 /* Replace link's underlying BPF program with the new one */
10075 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
10076 {
10077         int ret;
10078         
10079         ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
10080         return libbpf_err_errno(ret);
10081 }
10082
10083 /* Release "ownership" of underlying BPF resource (typically, BPF program
10084  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
10085  * link, when destructed through bpf_link__destroy() call won't attempt to
10086  * detach/unregisted that BPF resource. This is useful in situations where,
10087  * say, attached BPF program has to outlive userspace program that attached it
10088  * in the system. Depending on type of BPF program, though, there might be
10089  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
10090  * exit of userspace program doesn't trigger automatic detachment and clean up
10091  * inside the kernel.
10092  */
10093 void bpf_link__disconnect(struct bpf_link *link)
10094 {
10095         link->disconnected = true;
10096 }
10097
10098 int bpf_link__destroy(struct bpf_link *link)
10099 {
10100         int err = 0;
10101
10102         if (IS_ERR_OR_NULL(link))
10103                 return 0;
10104
10105         if (!link->disconnected && link->detach)
10106                 err = link->detach(link);
10107         if (link->destroy)
10108                 link->destroy(link);
10109         if (link->pin_path)
10110                 free(link->pin_path);
10111         free(link);
10112
10113         return libbpf_err(err);
10114 }
10115
10116 int bpf_link__fd(const struct bpf_link *link)
10117 {
10118         return link->fd;
10119 }
10120
10121 const char *bpf_link__pin_path(const struct bpf_link *link)
10122 {
10123         return link->pin_path;
10124 }
10125
10126 static int bpf_link__detach_fd(struct bpf_link *link)
10127 {
10128         return libbpf_err_errno(close(link->fd));
10129 }
10130
10131 struct bpf_link *bpf_link__open(const char *path)
10132 {
10133         struct bpf_link *link;
10134         int fd;
10135
10136         fd = bpf_obj_get(path);
10137         if (fd < 0) {
10138                 fd = -errno;
10139                 pr_warn("failed to open link at %s: %d\n", path, fd);
10140                 return libbpf_err_ptr(fd);
10141         }
10142
10143         link = calloc(1, sizeof(*link));
10144         if (!link) {
10145                 close(fd);
10146                 return libbpf_err_ptr(-ENOMEM);
10147         }
10148         link->detach = &bpf_link__detach_fd;
10149         link->fd = fd;
10150
10151         link->pin_path = strdup(path);
10152         if (!link->pin_path) {
10153                 bpf_link__destroy(link);
10154                 return libbpf_err_ptr(-ENOMEM);
10155         }
10156
10157         return link;
10158 }
10159
10160 int bpf_link__detach(struct bpf_link *link)
10161 {
10162         return bpf_link_detach(link->fd) ? -errno : 0;
10163 }
10164
10165 int bpf_link__pin(struct bpf_link *link, const char *path)
10166 {
10167         int err;
10168
10169         if (link->pin_path)
10170                 return libbpf_err(-EBUSY);
10171         err = make_parent_dir(path);
10172         if (err)
10173                 return libbpf_err(err);
10174         err = check_path(path);
10175         if (err)
10176                 return libbpf_err(err);
10177
10178         link->pin_path = strdup(path);
10179         if (!link->pin_path)
10180                 return libbpf_err(-ENOMEM);
10181
10182         if (bpf_obj_pin(link->fd, link->pin_path)) {
10183                 err = -errno;
10184                 zfree(&link->pin_path);
10185                 return libbpf_err(err);
10186         }
10187
10188         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
10189         return 0;
10190 }
10191
10192 int bpf_link__unpin(struct bpf_link *link)
10193 {
10194         int err;
10195
10196         if (!link->pin_path)
10197                 return libbpf_err(-EINVAL);
10198
10199         err = unlink(link->pin_path);
10200         if (err != 0)
10201                 return -errno;
10202
10203         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
10204         zfree(&link->pin_path);
10205         return 0;
10206 }
10207
10208 static int bpf_link__detach_perf_event(struct bpf_link *link)
10209 {
10210         int err;
10211
10212         err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
10213         if (err)
10214                 err = -errno;
10215
10216         close(link->fd);
10217         return libbpf_err(err);
10218 }
10219
10220 struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
10221 {
10222         char errmsg[STRERR_BUFSIZE];
10223         struct bpf_link *link;
10224         int prog_fd, err;
10225
10226         if (pfd < 0) {
10227                 pr_warn("prog '%s': invalid perf event FD %d\n",
10228                         prog->name, pfd);
10229                 return libbpf_err_ptr(-EINVAL);
10230         }
10231         prog_fd = bpf_program__fd(prog);
10232         if (prog_fd < 0) {
10233                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
10234                         prog->name);
10235                 return libbpf_err_ptr(-EINVAL);
10236         }
10237
10238         link = calloc(1, sizeof(*link));
10239         if (!link)
10240                 return libbpf_err_ptr(-ENOMEM);
10241         link->detach = &bpf_link__detach_perf_event;
10242         link->fd = pfd;
10243
10244         if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
10245                 err = -errno;
10246                 free(link);
10247                 pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
10248                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10249                 if (err == -EPROTO)
10250                         pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
10251                                 prog->name, pfd);
10252                 return libbpf_err_ptr(err);
10253         }
10254         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10255                 err = -errno;
10256                 free(link);
10257                 pr_warn("prog '%s': failed to enable pfd %d: %s\n",
10258                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10259                 return libbpf_err_ptr(err);
10260         }
10261         return link;
10262 }
10263
10264 /*
10265  * this function is expected to parse integer in the range of [0, 2^31-1] from
10266  * given file using scanf format string fmt. If actual parsed value is
10267  * negative, the result might be indistinguishable from error
10268  */
10269 static int parse_uint_from_file(const char *file, const char *fmt)
10270 {
10271         char buf[STRERR_BUFSIZE];
10272         int err, ret;
10273         FILE *f;
10274
10275         f = fopen(file, "r");
10276         if (!f) {
10277                 err = -errno;
10278                 pr_debug("failed to open '%s': %s\n", file,
10279                          libbpf_strerror_r(err, buf, sizeof(buf)));
10280                 return err;
10281         }
10282         err = fscanf(f, fmt, &ret);
10283         if (err != 1) {
10284                 err = err == EOF ? -EIO : -errno;
10285                 pr_debug("failed to parse '%s': %s\n", file,
10286                         libbpf_strerror_r(err, buf, sizeof(buf)));
10287                 fclose(f);
10288                 return err;
10289         }
10290         fclose(f);
10291         return ret;
10292 }
10293
10294 static int determine_kprobe_perf_type(void)
10295 {
10296         const char *file = "/sys/bus/event_source/devices/kprobe/type";
10297
10298         return parse_uint_from_file(file, "%d\n");
10299 }
10300
10301 static int determine_uprobe_perf_type(void)
10302 {
10303         const char *file = "/sys/bus/event_source/devices/uprobe/type";
10304
10305         return parse_uint_from_file(file, "%d\n");
10306 }
10307
10308 static int determine_kprobe_retprobe_bit(void)
10309 {
10310         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
10311
10312         return parse_uint_from_file(file, "config:%d\n");
10313 }
10314
10315 static int determine_uprobe_retprobe_bit(void)
10316 {
10317         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
10318
10319         return parse_uint_from_file(file, "config:%d\n");
10320 }
10321
10322 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
10323                                  uint64_t offset, int pid)
10324 {
10325         struct perf_event_attr attr = {};
10326         char errmsg[STRERR_BUFSIZE];
10327         int type, pfd, err;
10328
10329         type = uprobe ? determine_uprobe_perf_type()
10330                       : determine_kprobe_perf_type();
10331         if (type < 0) {
10332                 pr_warn("failed to determine %s perf type: %s\n",
10333                         uprobe ? "uprobe" : "kprobe",
10334                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
10335                 return type;
10336         }
10337         if (retprobe) {
10338                 int bit = uprobe ? determine_uprobe_retprobe_bit()
10339                                  : determine_kprobe_retprobe_bit();
10340
10341                 if (bit < 0) {
10342                         pr_warn("failed to determine %s retprobe bit: %s\n",
10343                                 uprobe ? "uprobe" : "kprobe",
10344                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
10345                         return bit;
10346                 }
10347                 attr.config |= 1 << bit;
10348         }
10349         attr.size = sizeof(attr);
10350         attr.type = type;
10351         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
10352         attr.config2 = offset;           /* kprobe_addr or probe_offset */
10353
10354         /* pid filter is meaningful only for uprobes */
10355         pfd = syscall(__NR_perf_event_open, &attr,
10356                       pid < 0 ? -1 : pid /* pid */,
10357                       pid == -1 ? 0 : -1 /* cpu */,
10358                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10359         if (pfd < 0) {
10360                 err = -errno;
10361                 pr_warn("%s perf_event_open() failed: %s\n",
10362                         uprobe ? "uprobe" : "kprobe",
10363                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10364                 return err;
10365         }
10366         return pfd;
10367 }
10368
10369 struct bpf_program_attach_kprobe_opts {
10370         bool retprobe;
10371         unsigned long offset;
10372 };
10373
10374 static struct bpf_link*
10375 bpf_program__attach_kprobe_opts(struct bpf_program *prog,
10376                                 const char *func_name,
10377                                 struct bpf_program_attach_kprobe_opts *opts)
10378 {
10379         char errmsg[STRERR_BUFSIZE];
10380         struct bpf_link *link;
10381         int pfd, err;
10382
10383         pfd = perf_event_open_probe(false /* uprobe */, opts->retprobe, func_name,
10384                                     opts->offset, -1 /* pid */);
10385         if (pfd < 0) {
10386                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
10387                         prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
10388                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10389                 return libbpf_err_ptr(pfd);
10390         }
10391         link = bpf_program__attach_perf_event(prog, pfd);
10392         err = libbpf_get_error(link);
10393         if (err) {
10394                 close(pfd);
10395                 pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
10396                         prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
10397                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10398                 return libbpf_err_ptr(err);
10399         }
10400         return link;
10401 }
10402
10403 struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
10404                                             bool retprobe,
10405                                             const char *func_name)
10406 {
10407         struct bpf_program_attach_kprobe_opts opts = {
10408                 .retprobe = retprobe,
10409         };
10410
10411         return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
10412 }
10413
10414 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
10415                                       struct bpf_program *prog)
10416 {
10417         struct bpf_program_attach_kprobe_opts opts;
10418         unsigned long offset = 0;
10419         struct bpf_link *link;
10420         const char *func_name;
10421         char *func;
10422         int n, err;
10423
10424         func_name = prog->sec_name + sec->len;
10425         opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0;
10426
10427         n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
10428         if (n < 1) {
10429                 err = -EINVAL;
10430                 pr_warn("kprobe name is invalid: %s\n", func_name);
10431                 return libbpf_err_ptr(err);
10432         }
10433         if (opts.retprobe && offset != 0) {
10434                 free(func);
10435                 err = -EINVAL;
10436                 pr_warn("kretprobes do not support offset specification\n");
10437                 return libbpf_err_ptr(err);
10438         }
10439
10440         opts.offset = offset;
10441         link = bpf_program__attach_kprobe_opts(prog, func, &opts);
10442         free(func);
10443         return link;
10444 }
10445
10446 struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
10447                                             bool retprobe, pid_t pid,
10448                                             const char *binary_path,
10449                                             size_t func_offset)
10450 {
10451         char errmsg[STRERR_BUFSIZE];
10452         struct bpf_link *link;
10453         int pfd, err;
10454
10455         pfd = perf_event_open_probe(true /* uprobe */, retprobe,
10456                                     binary_path, func_offset, pid);
10457         if (pfd < 0) {
10458                 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
10459                         prog->name, retprobe ? "uretprobe" : "uprobe",
10460                         binary_path, func_offset,
10461                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10462                 return libbpf_err_ptr(pfd);
10463         }
10464         link = bpf_program__attach_perf_event(prog, pfd);
10465         err = libbpf_get_error(link);
10466         if (err) {
10467                 close(pfd);
10468                 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
10469                         prog->name, retprobe ? "uretprobe" : "uprobe",
10470                         binary_path, func_offset,
10471                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10472                 return libbpf_err_ptr(err);
10473         }
10474         return link;
10475 }
10476
10477 static int determine_tracepoint_id(const char *tp_category,
10478                                    const char *tp_name)
10479 {
10480         char file[PATH_MAX];
10481         int ret;
10482
10483         ret = snprintf(file, sizeof(file),
10484                        "/sys/kernel/debug/tracing/events/%s/%s/id",
10485                        tp_category, tp_name);
10486         if (ret < 0)
10487                 return -errno;
10488         if (ret >= sizeof(file)) {
10489                 pr_debug("tracepoint %s/%s path is too long\n",
10490                          tp_category, tp_name);
10491                 return -E2BIG;
10492         }
10493         return parse_uint_from_file(file, "%d\n");
10494 }
10495
10496 static int perf_event_open_tracepoint(const char *tp_category,
10497                                       const char *tp_name)
10498 {
10499         struct perf_event_attr attr = {};
10500         char errmsg[STRERR_BUFSIZE];
10501         int tp_id, pfd, err;
10502
10503         tp_id = determine_tracepoint_id(tp_category, tp_name);
10504         if (tp_id < 0) {
10505                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
10506                         tp_category, tp_name,
10507                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
10508                 return tp_id;
10509         }
10510
10511         attr.type = PERF_TYPE_TRACEPOINT;
10512         attr.size = sizeof(attr);
10513         attr.config = tp_id;
10514
10515         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
10516                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10517         if (pfd < 0) {
10518                 err = -errno;
10519                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
10520                         tp_category, tp_name,
10521                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10522                 return err;
10523         }
10524         return pfd;
10525 }
10526
10527 struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
10528                                                 const char *tp_category,
10529                                                 const char *tp_name)
10530 {
10531         char errmsg[STRERR_BUFSIZE];
10532         struct bpf_link *link;
10533         int pfd, err;
10534
10535         pfd = perf_event_open_tracepoint(tp_category, tp_name);
10536         if (pfd < 0) {
10537                 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
10538                         prog->name, tp_category, tp_name,
10539                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10540                 return libbpf_err_ptr(pfd);
10541         }
10542         link = bpf_program__attach_perf_event(prog, pfd);
10543         err = libbpf_get_error(link);
10544         if (err) {
10545                 close(pfd);
10546                 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
10547                         prog->name, tp_category, tp_name,
10548                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10549                 return libbpf_err_ptr(err);
10550         }
10551         return link;
10552 }
10553
10554 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
10555                                   struct bpf_program *prog)
10556 {
10557         char *sec_name, *tp_cat, *tp_name;
10558         struct bpf_link *link;
10559
10560         sec_name = strdup(prog->sec_name);
10561         if (!sec_name)
10562                 return libbpf_err_ptr(-ENOMEM);
10563
10564         /* extract "tp/<category>/<name>" */
10565         tp_cat = sec_name + sec->len;
10566         tp_name = strchr(tp_cat, '/');
10567         if (!tp_name) {
10568                 free(sec_name);
10569                 return libbpf_err_ptr(-EINVAL);
10570         }
10571         *tp_name = '\0';
10572         tp_name++;
10573
10574         link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
10575         free(sec_name);
10576         return link;
10577 }
10578
10579 struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
10580                                                     const char *tp_name)
10581 {
10582         char errmsg[STRERR_BUFSIZE];
10583         struct bpf_link *link;
10584         int prog_fd, pfd;
10585
10586         prog_fd = bpf_program__fd(prog);
10587         if (prog_fd < 0) {
10588                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10589                 return libbpf_err_ptr(-EINVAL);
10590         }
10591
10592         link = calloc(1, sizeof(*link));
10593         if (!link)
10594                 return libbpf_err_ptr(-ENOMEM);
10595         link->detach = &bpf_link__detach_fd;
10596
10597         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
10598         if (pfd < 0) {
10599                 pfd = -errno;
10600                 free(link);
10601                 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
10602                         prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10603                 return libbpf_err_ptr(pfd);
10604         }
10605         link->fd = pfd;
10606         return link;
10607 }
10608
10609 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
10610                                       struct bpf_program *prog)
10611 {
10612         const char *tp_name = prog->sec_name + sec->len;
10613
10614         return bpf_program__attach_raw_tracepoint(prog, tp_name);
10615 }
10616
10617 /* Common logic for all BPF program types that attach to a btf_id */
10618 static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
10619 {
10620         char errmsg[STRERR_BUFSIZE];
10621         struct bpf_link *link;
10622         int prog_fd, pfd;
10623
10624         prog_fd = bpf_program__fd(prog);
10625         if (prog_fd < 0) {
10626                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10627                 return libbpf_err_ptr(-EINVAL);
10628         }
10629
10630         link = calloc(1, sizeof(*link));
10631         if (!link)
10632                 return libbpf_err_ptr(-ENOMEM);
10633         link->detach = &bpf_link__detach_fd;
10634
10635         pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
10636         if (pfd < 0) {
10637                 pfd = -errno;
10638                 free(link);
10639                 pr_warn("prog '%s': failed to attach: %s\n",
10640                         prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10641                 return libbpf_err_ptr(pfd);
10642         }
10643         link->fd = pfd;
10644         return (struct bpf_link *)link;
10645 }
10646
10647 struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
10648 {
10649         return bpf_program__attach_btf_id(prog);
10650 }
10651
10652 struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
10653 {
10654         return bpf_program__attach_btf_id(prog);
10655 }
10656
10657 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
10658                                      struct bpf_program *prog)
10659 {
10660         return bpf_program__attach_trace(prog);
10661 }
10662
10663 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
10664                                    struct bpf_program *prog)
10665 {
10666         return bpf_program__attach_lsm(prog);
10667 }
10668
10669 static struct bpf_link *
10670 bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
10671                        const char *target_name)
10672 {
10673         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
10674                             .target_btf_id = btf_id);
10675         enum bpf_attach_type attach_type;
10676         char errmsg[STRERR_BUFSIZE];
10677         struct bpf_link *link;
10678         int prog_fd, link_fd;
10679
10680         prog_fd = bpf_program__fd(prog);
10681         if (prog_fd < 0) {
10682                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10683                 return libbpf_err_ptr(-EINVAL);
10684         }
10685
10686         link = calloc(1, sizeof(*link));
10687         if (!link)
10688                 return libbpf_err_ptr(-ENOMEM);
10689         link->detach = &bpf_link__detach_fd;
10690
10691         attach_type = bpf_program__get_expected_attach_type(prog);
10692         link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
10693         if (link_fd < 0) {
10694                 link_fd = -errno;
10695                 free(link);
10696                 pr_warn("prog '%s': failed to attach to %s: %s\n",
10697                         prog->name, target_name,
10698                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
10699                 return libbpf_err_ptr(link_fd);
10700         }
10701         link->fd = link_fd;
10702         return link;
10703 }
10704
10705 struct bpf_link *
10706 bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
10707 {
10708         return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
10709 }
10710
10711 struct bpf_link *
10712 bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
10713 {
10714         return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
10715 }
10716
10717 struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
10718 {
10719         /* target_fd/target_ifindex use the same field in LINK_CREATE */
10720         return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
10721 }
10722
10723 struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
10724                                               int target_fd,
10725                                               const char *attach_func_name)
10726 {
10727         int btf_id;
10728
10729         if (!!target_fd != !!attach_func_name) {
10730                 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
10731                         prog->name);
10732                 return libbpf_err_ptr(-EINVAL);
10733         }
10734
10735         if (prog->type != BPF_PROG_TYPE_EXT) {
10736                 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
10737                         prog->name);
10738                 return libbpf_err_ptr(-EINVAL);
10739         }
10740
10741         if (target_fd) {
10742                 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
10743                 if (btf_id < 0)
10744                         return libbpf_err_ptr(btf_id);
10745
10746                 return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
10747         } else {
10748                 /* no target, so use raw_tracepoint_open for compatibility
10749                  * with old kernels
10750                  */
10751                 return bpf_program__attach_trace(prog);
10752         }
10753 }
10754
10755 struct bpf_link *
10756 bpf_program__attach_iter(struct bpf_program *prog,
10757                          const struct bpf_iter_attach_opts *opts)
10758 {
10759         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
10760         char errmsg[STRERR_BUFSIZE];
10761         struct bpf_link *link;
10762         int prog_fd, link_fd;
10763         __u32 target_fd = 0;
10764
10765         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
10766                 return libbpf_err_ptr(-EINVAL);
10767
10768         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
10769         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
10770
10771         prog_fd = bpf_program__fd(prog);
10772         if (prog_fd < 0) {
10773                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10774                 return libbpf_err_ptr(-EINVAL);
10775         }
10776
10777         link = calloc(1, sizeof(*link));
10778         if (!link)
10779                 return libbpf_err_ptr(-ENOMEM);
10780         link->detach = &bpf_link__detach_fd;
10781
10782         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
10783                                   &link_create_opts);
10784         if (link_fd < 0) {
10785                 link_fd = -errno;
10786                 free(link);
10787                 pr_warn("prog '%s': failed to attach to iterator: %s\n",
10788                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
10789                 return libbpf_err_ptr(link_fd);
10790         }
10791         link->fd = link_fd;
10792         return link;
10793 }
10794
10795 static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
10796                                     struct bpf_program *prog)
10797 {
10798         return bpf_program__attach_iter(prog, NULL);
10799 }
10800
10801 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
10802 {
10803         const struct bpf_sec_def *sec_def;
10804
10805         sec_def = find_sec_def(prog->sec_name);
10806         if (!sec_def || !sec_def->attach_fn)
10807                 return libbpf_err_ptr(-ESRCH);
10808
10809         return sec_def->attach_fn(sec_def, prog);
10810 }
10811
10812 static int bpf_link__detach_struct_ops(struct bpf_link *link)
10813 {
10814         __u32 zero = 0;
10815
10816         if (bpf_map_delete_elem(link->fd, &zero))
10817                 return -errno;
10818
10819         return 0;
10820 }
10821
10822 struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
10823 {
10824         struct bpf_struct_ops *st_ops;
10825         struct bpf_link *link;
10826         __u32 i, zero = 0;
10827         int err;
10828
10829         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
10830                 return libbpf_err_ptr(-EINVAL);
10831
10832         link = calloc(1, sizeof(*link));
10833         if (!link)
10834                 return libbpf_err_ptr(-EINVAL);
10835
10836         st_ops = map->st_ops;
10837         for (i = 0; i < btf_vlen(st_ops->type); i++) {
10838                 struct bpf_program *prog = st_ops->progs[i];
10839                 void *kern_data;
10840                 int prog_fd;
10841
10842                 if (!prog)
10843                         continue;
10844
10845                 prog_fd = bpf_program__fd(prog);
10846                 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
10847                 *(unsigned long *)kern_data = prog_fd;
10848         }
10849
10850         err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
10851         if (err) {
10852                 err = -errno;
10853                 free(link);
10854                 return libbpf_err_ptr(err);
10855         }
10856
10857         link->detach = bpf_link__detach_struct_ops;
10858         link->fd = map->fd;
10859
10860         return link;
10861 }
10862
10863 enum bpf_perf_event_ret
10864 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
10865                            void **copy_mem, size_t *copy_size,
10866                            bpf_perf_event_print_t fn, void *private_data)
10867 {
10868         struct perf_event_mmap_page *header = mmap_mem;
10869         __u64 data_head = ring_buffer_read_head(header);
10870         __u64 data_tail = header->data_tail;
10871         void *base = ((__u8 *)header) + page_size;
10872         int ret = LIBBPF_PERF_EVENT_CONT;
10873         struct perf_event_header *ehdr;
10874         size_t ehdr_size;
10875
10876         while (data_head != data_tail) {
10877                 ehdr = base + (data_tail & (mmap_size - 1));
10878                 ehdr_size = ehdr->size;
10879
10880                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
10881                         void *copy_start = ehdr;
10882                         size_t len_first = base + mmap_size - copy_start;
10883                         size_t len_secnd = ehdr_size - len_first;
10884
10885                         if (*copy_size < ehdr_size) {
10886                                 free(*copy_mem);
10887                                 *copy_mem = malloc(ehdr_size);
10888                                 if (!*copy_mem) {
10889                                         *copy_size = 0;
10890                                         ret = LIBBPF_PERF_EVENT_ERROR;
10891                                         break;
10892                                 }
10893                                 *copy_size = ehdr_size;
10894                         }
10895
10896                         memcpy(*copy_mem, copy_start, len_first);
10897                         memcpy(*copy_mem + len_first, base, len_secnd);
10898                         ehdr = *copy_mem;
10899                 }
10900
10901                 ret = fn(ehdr, private_data);
10902                 data_tail += ehdr_size;
10903                 if (ret != LIBBPF_PERF_EVENT_CONT)
10904                         break;
10905         }
10906
10907         ring_buffer_write_tail(header, data_tail);
10908         return libbpf_err(ret);
10909 }
10910
10911 struct perf_buffer;
10912
10913 struct perf_buffer_params {
10914         struct perf_event_attr *attr;
10915         /* if event_cb is specified, it takes precendence */
10916         perf_buffer_event_fn event_cb;
10917         /* sample_cb and lost_cb are higher-level common-case callbacks */
10918         perf_buffer_sample_fn sample_cb;
10919         perf_buffer_lost_fn lost_cb;
10920         void *ctx;
10921         int cpu_cnt;
10922         int *cpus;
10923         int *map_keys;
10924 };
10925
10926 struct perf_cpu_buf {
10927         struct perf_buffer *pb;
10928         void *base; /* mmap()'ed memory */
10929         void *buf; /* for reconstructing segmented data */
10930         size_t buf_size;
10931         int fd;
10932         int cpu;
10933         int map_key;
10934 };
10935
10936 struct perf_buffer {
10937         perf_buffer_event_fn event_cb;
10938         perf_buffer_sample_fn sample_cb;
10939         perf_buffer_lost_fn lost_cb;
10940         void *ctx; /* passed into callbacks */
10941
10942         size_t page_size;
10943         size_t mmap_size;
10944         struct perf_cpu_buf **cpu_bufs;
10945         struct epoll_event *events;
10946         int cpu_cnt; /* number of allocated CPU buffers */
10947         int epoll_fd; /* perf event FD */
10948         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
10949 };
10950
10951 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
10952                                       struct perf_cpu_buf *cpu_buf)
10953 {
10954         if (!cpu_buf)
10955                 return;
10956         if (cpu_buf->base &&
10957             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
10958                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
10959         if (cpu_buf->fd >= 0) {
10960                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
10961                 close(cpu_buf->fd);
10962         }
10963         free(cpu_buf->buf);
10964         free(cpu_buf);
10965 }
10966
10967 void perf_buffer__free(struct perf_buffer *pb)
10968 {
10969         int i;
10970
10971         if (IS_ERR_OR_NULL(pb))
10972                 return;
10973         if (pb->cpu_bufs) {
10974                 for (i = 0; i < pb->cpu_cnt; i++) {
10975                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
10976
10977                         if (!cpu_buf)
10978                                 continue;
10979
10980                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
10981                         perf_buffer__free_cpu_buf(pb, cpu_buf);
10982                 }
10983                 free(pb->cpu_bufs);
10984         }
10985         if (pb->epoll_fd >= 0)
10986                 close(pb->epoll_fd);
10987         free(pb->events);
10988         free(pb);
10989 }
10990
10991 static struct perf_cpu_buf *
10992 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
10993                           int cpu, int map_key)
10994 {
10995         struct perf_cpu_buf *cpu_buf;
10996         char msg[STRERR_BUFSIZE];
10997         int err;
10998
10999         cpu_buf = calloc(1, sizeof(*cpu_buf));
11000         if (!cpu_buf)
11001                 return ERR_PTR(-ENOMEM);
11002
11003         cpu_buf->pb = pb;
11004         cpu_buf->cpu = cpu;
11005         cpu_buf->map_key = map_key;
11006
11007         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
11008                               -1, PERF_FLAG_FD_CLOEXEC);
11009         if (cpu_buf->fd < 0) {
11010                 err = -errno;
11011                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
11012                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11013                 goto error;
11014         }
11015
11016         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
11017                              PROT_READ | PROT_WRITE, MAP_SHARED,
11018                              cpu_buf->fd, 0);
11019         if (cpu_buf->base == MAP_FAILED) {
11020                 cpu_buf->base = NULL;
11021                 err = -errno;
11022                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
11023                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11024                 goto error;
11025         }
11026
11027         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
11028                 err = -errno;
11029                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
11030                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11031                 goto error;
11032         }
11033
11034         return cpu_buf;
11035
11036 error:
11037         perf_buffer__free_cpu_buf(pb, cpu_buf);
11038         return (struct perf_cpu_buf *)ERR_PTR(err);
11039 }
11040
11041 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
11042                                               struct perf_buffer_params *p);
11043
11044 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
11045                                      const struct perf_buffer_opts *opts)
11046 {
11047         struct perf_buffer_params p = {};
11048         struct perf_event_attr attr = { 0, };
11049
11050         attr.config = PERF_COUNT_SW_BPF_OUTPUT;
11051         attr.type = PERF_TYPE_SOFTWARE;
11052         attr.sample_type = PERF_SAMPLE_RAW;
11053         attr.sample_period = 1;
11054         attr.wakeup_events = 1;
11055
11056         p.attr = &attr;
11057         p.sample_cb = opts ? opts->sample_cb : NULL;
11058         p.lost_cb = opts ? opts->lost_cb : NULL;
11059         p.ctx = opts ? opts->ctx : NULL;
11060
11061         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
11062 }
11063
11064 struct perf_buffer *
11065 perf_buffer__new_raw(int map_fd, size_t page_cnt,
11066                      const struct perf_buffer_raw_opts *opts)
11067 {
11068         struct perf_buffer_params p = {};
11069
11070         p.attr = opts->attr;
11071         p.event_cb = opts->event_cb;
11072         p.ctx = opts->ctx;
11073         p.cpu_cnt = opts->cpu_cnt;
11074         p.cpus = opts->cpus;
11075         p.map_keys = opts->map_keys;
11076
11077         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
11078 }
11079
11080 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
11081                                               struct perf_buffer_params *p)
11082 {
11083         const char *online_cpus_file = "/sys/devices/system/cpu/online";
11084         struct bpf_map_info map;
11085         char msg[STRERR_BUFSIZE];
11086         struct perf_buffer *pb;
11087         bool *online = NULL;
11088         __u32 map_info_len;
11089         int err, i, j, n;
11090
11091         if (page_cnt & (page_cnt - 1)) {
11092                 pr_warn("page count should be power of two, but is %zu\n",
11093                         page_cnt);
11094                 return ERR_PTR(-EINVAL);
11095         }
11096
11097         /* best-effort sanity checks */
11098         memset(&map, 0, sizeof(map));
11099         map_info_len = sizeof(map);
11100         err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
11101         if (err) {
11102                 err = -errno;
11103                 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
11104                  * -EBADFD, -EFAULT, or -E2BIG on real error
11105                  */
11106                 if (err != -EINVAL) {
11107                         pr_warn("failed to get map info for map FD %d: %s\n",
11108                                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
11109                         return ERR_PTR(err);
11110                 }
11111                 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
11112                          map_fd);
11113         } else {
11114                 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
11115                         pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
11116                                 map.name);
11117                         return ERR_PTR(-EINVAL);
11118                 }
11119         }
11120
11121         pb = calloc(1, sizeof(*pb));
11122         if (!pb)
11123                 return ERR_PTR(-ENOMEM);
11124
11125         pb->event_cb = p->event_cb;
11126         pb->sample_cb = p->sample_cb;
11127         pb->lost_cb = p->lost_cb;
11128         pb->ctx = p->ctx;
11129
11130         pb->page_size = getpagesize();
11131         pb->mmap_size = pb->page_size * page_cnt;
11132         pb->map_fd = map_fd;
11133
11134         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
11135         if (pb->epoll_fd < 0) {
11136                 err = -errno;
11137                 pr_warn("failed to create epoll instance: %s\n",
11138                         libbpf_strerror_r(err, msg, sizeof(msg)));
11139                 goto error;
11140         }
11141
11142         if (p->cpu_cnt > 0) {
11143                 pb->cpu_cnt = p->cpu_cnt;
11144         } else {
11145                 pb->cpu_cnt = libbpf_num_possible_cpus();
11146                 if (pb->cpu_cnt < 0) {
11147                         err = pb->cpu_cnt;
11148                         goto error;
11149                 }
11150                 if (map.max_entries && map.max_entries < pb->cpu_cnt)
11151                         pb->cpu_cnt = map.max_entries;
11152         }
11153
11154         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
11155         if (!pb->events) {
11156                 err = -ENOMEM;
11157                 pr_warn("failed to allocate events: out of memory\n");
11158                 goto error;
11159         }
11160         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
11161         if (!pb->cpu_bufs) {
11162                 err = -ENOMEM;
11163                 pr_warn("failed to allocate buffers: out of memory\n");
11164                 goto error;
11165         }
11166
11167         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
11168         if (err) {
11169                 pr_warn("failed to get online CPU mask: %d\n", err);
11170                 goto error;
11171         }
11172
11173         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
11174                 struct perf_cpu_buf *cpu_buf;
11175                 int cpu, map_key;
11176
11177                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
11178                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
11179
11180                 /* in case user didn't explicitly requested particular CPUs to
11181                  * be attached to, skip offline/not present CPUs
11182                  */
11183                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
11184                         continue;
11185
11186                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
11187                 if (IS_ERR(cpu_buf)) {
11188                         err = PTR_ERR(cpu_buf);
11189                         goto error;
11190                 }
11191
11192                 pb->cpu_bufs[j] = cpu_buf;
11193
11194                 err = bpf_map_update_elem(pb->map_fd, &map_key,
11195                                           &cpu_buf->fd, 0);
11196                 if (err) {
11197                         err = -errno;
11198                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
11199                                 cpu, map_key, cpu_buf->fd,
11200                                 libbpf_strerror_r(err, msg, sizeof(msg)));
11201                         goto error;
11202                 }
11203
11204                 pb->events[j].events = EPOLLIN;
11205                 pb->events[j].data.ptr = cpu_buf;
11206                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
11207                               &pb->events[j]) < 0) {
11208                         err = -errno;
11209                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
11210                                 cpu, cpu_buf->fd,
11211                                 libbpf_strerror_r(err, msg, sizeof(msg)));
11212                         goto error;
11213                 }
11214                 j++;
11215         }
11216         pb->cpu_cnt = j;
11217         free(online);
11218
11219         return pb;
11220
11221 error:
11222         free(online);
11223         if (pb)
11224                 perf_buffer__free(pb);
11225         return ERR_PTR(err);
11226 }
11227
11228 struct perf_sample_raw {
11229         struct perf_event_header header;
11230         uint32_t size;
11231         char data[];
11232 };
11233
11234 struct perf_sample_lost {
11235         struct perf_event_header header;
11236         uint64_t id;
11237         uint64_t lost;
11238         uint64_t sample_id;
11239 };
11240
11241 static enum bpf_perf_event_ret
11242 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
11243 {
11244         struct perf_cpu_buf *cpu_buf = ctx;
11245         struct perf_buffer *pb = cpu_buf->pb;
11246         void *data = e;
11247
11248         /* user wants full control over parsing perf event */
11249         if (pb->event_cb)
11250                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
11251
11252         switch (e->type) {
11253         case PERF_RECORD_SAMPLE: {
11254                 struct perf_sample_raw *s = data;
11255
11256                 if (pb->sample_cb)
11257                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
11258                 break;
11259         }
11260         case PERF_RECORD_LOST: {
11261                 struct perf_sample_lost *s = data;
11262
11263                 if (pb->lost_cb)
11264                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
11265                 break;
11266         }
11267         default:
11268                 pr_warn("unknown perf sample type %d\n", e->type);
11269                 return LIBBPF_PERF_EVENT_ERROR;
11270         }
11271         return LIBBPF_PERF_EVENT_CONT;
11272 }
11273
11274 static int perf_buffer__process_records(struct perf_buffer *pb,
11275                                         struct perf_cpu_buf *cpu_buf)
11276 {
11277         enum bpf_perf_event_ret ret;
11278
11279         ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
11280                                          pb->page_size, &cpu_buf->buf,
11281                                          &cpu_buf->buf_size,
11282                                          perf_buffer__process_record, cpu_buf);
11283         if (ret != LIBBPF_PERF_EVENT_CONT)
11284                 return ret;
11285         return 0;
11286 }
11287
11288 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
11289 {
11290         return pb->epoll_fd;
11291 }
11292
11293 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
11294 {
11295         int i, cnt, err;
11296
11297         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
11298         if (cnt < 0)
11299                 return -errno;
11300
11301         for (i = 0; i < cnt; i++) {
11302                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
11303
11304                 err = perf_buffer__process_records(pb, cpu_buf);
11305                 if (err) {
11306                         pr_warn("error while processing records: %d\n", err);
11307                         return libbpf_err(err);
11308                 }
11309         }
11310         return cnt;
11311 }
11312
11313 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
11314  * manager.
11315  */
11316 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
11317 {
11318         return pb->cpu_cnt;
11319 }
11320
11321 /*
11322  * Return perf_event FD of a ring buffer in *buf_idx* slot of
11323  * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
11324  * select()/poll()/epoll() Linux syscalls.
11325  */
11326 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
11327 {
11328         struct perf_cpu_buf *cpu_buf;
11329
11330         if (buf_idx >= pb->cpu_cnt)
11331                 return libbpf_err(-EINVAL);
11332
11333         cpu_buf = pb->cpu_bufs[buf_idx];
11334         if (!cpu_buf)
11335                 return libbpf_err(-ENOENT);
11336
11337         return cpu_buf->fd;
11338 }
11339
11340 /*
11341  * Consume data from perf ring buffer corresponding to slot *buf_idx* in
11342  * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
11343  * consume, do nothing and return success.
11344  * Returns:
11345  *   - 0 on success;
11346  *   - <0 on failure.
11347  */
11348 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
11349 {
11350         struct perf_cpu_buf *cpu_buf;
11351
11352         if (buf_idx >= pb->cpu_cnt)
11353                 return libbpf_err(-EINVAL);
11354
11355         cpu_buf = pb->cpu_bufs[buf_idx];
11356         if (!cpu_buf)
11357                 return libbpf_err(-ENOENT);
11358
11359         return perf_buffer__process_records(pb, cpu_buf);
11360 }
11361
11362 int perf_buffer__consume(struct perf_buffer *pb)
11363 {
11364         int i, err;
11365
11366         for (i = 0; i < pb->cpu_cnt; i++) {
11367                 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
11368
11369                 if (!cpu_buf)
11370                         continue;
11371
11372                 err = perf_buffer__process_records(pb, cpu_buf);
11373                 if (err) {
11374                         pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
11375                         return libbpf_err(err);
11376                 }
11377         }
11378         return 0;
11379 }
11380
11381 struct bpf_prog_info_array_desc {
11382         int     array_offset;   /* e.g. offset of jited_prog_insns */
11383         int     count_offset;   /* e.g. offset of jited_prog_len */
11384         int     size_offset;    /* > 0: offset of rec size,
11385                                  * < 0: fix size of -size_offset
11386                                  */
11387 };
11388
11389 static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
11390         [BPF_PROG_INFO_JITED_INSNS] = {
11391                 offsetof(struct bpf_prog_info, jited_prog_insns),
11392                 offsetof(struct bpf_prog_info, jited_prog_len),
11393                 -1,
11394         },
11395         [BPF_PROG_INFO_XLATED_INSNS] = {
11396                 offsetof(struct bpf_prog_info, xlated_prog_insns),
11397                 offsetof(struct bpf_prog_info, xlated_prog_len),
11398                 -1,
11399         },
11400         [BPF_PROG_INFO_MAP_IDS] = {
11401                 offsetof(struct bpf_prog_info, map_ids),
11402                 offsetof(struct bpf_prog_info, nr_map_ids),
11403                 -(int)sizeof(__u32),
11404         },
11405         [BPF_PROG_INFO_JITED_KSYMS] = {
11406                 offsetof(struct bpf_prog_info, jited_ksyms),
11407                 offsetof(struct bpf_prog_info, nr_jited_ksyms),
11408                 -(int)sizeof(__u64),
11409         },
11410         [BPF_PROG_INFO_JITED_FUNC_LENS] = {
11411                 offsetof(struct bpf_prog_info, jited_func_lens),
11412                 offsetof(struct bpf_prog_info, nr_jited_func_lens),
11413                 -(int)sizeof(__u32),
11414         },
11415         [BPF_PROG_INFO_FUNC_INFO] = {
11416                 offsetof(struct bpf_prog_info, func_info),
11417                 offsetof(struct bpf_prog_info, nr_func_info),
11418                 offsetof(struct bpf_prog_info, func_info_rec_size),
11419         },
11420         [BPF_PROG_INFO_LINE_INFO] = {
11421                 offsetof(struct bpf_prog_info, line_info),
11422                 offsetof(struct bpf_prog_info, nr_line_info),
11423                 offsetof(struct bpf_prog_info, line_info_rec_size),
11424         },
11425         [BPF_PROG_INFO_JITED_LINE_INFO] = {
11426                 offsetof(struct bpf_prog_info, jited_line_info),
11427                 offsetof(struct bpf_prog_info, nr_jited_line_info),
11428                 offsetof(struct bpf_prog_info, jited_line_info_rec_size),
11429         },
11430         [BPF_PROG_INFO_PROG_TAGS] = {
11431                 offsetof(struct bpf_prog_info, prog_tags),
11432                 offsetof(struct bpf_prog_info, nr_prog_tags),
11433                 -(int)sizeof(__u8) * BPF_TAG_SIZE,
11434         },
11435
11436 };
11437
11438 static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
11439                                            int offset)
11440 {
11441         __u32 *array = (__u32 *)info;
11442
11443         if (offset >= 0)
11444                 return array[offset / sizeof(__u32)];
11445         return -(int)offset;
11446 }
11447
11448 static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
11449                                            int offset)
11450 {
11451         __u64 *array = (__u64 *)info;
11452
11453         if (offset >= 0)
11454                 return array[offset / sizeof(__u64)];
11455         return -(int)offset;
11456 }
11457
11458 static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
11459                                          __u32 val)
11460 {
11461         __u32 *array = (__u32 *)info;
11462
11463         if (offset >= 0)
11464                 array[offset / sizeof(__u32)] = val;
11465 }
11466
11467 static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
11468                                          __u64 val)
11469 {
11470         __u64 *array = (__u64 *)info;
11471
11472         if (offset >= 0)
11473                 array[offset / sizeof(__u64)] = val;
11474 }
11475
11476 struct bpf_prog_info_linear *
11477 bpf_program__get_prog_info_linear(int fd, __u64 arrays)
11478 {
11479         struct bpf_prog_info_linear *info_linear;
11480         struct bpf_prog_info info = {};
11481         __u32 info_len = sizeof(info);
11482         __u32 data_len = 0;
11483         int i, err;
11484         void *ptr;
11485
11486         if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
11487                 return libbpf_err_ptr(-EINVAL);
11488
11489         /* step 1: get array dimensions */
11490         err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
11491         if (err) {
11492                 pr_debug("can't get prog info: %s", strerror(errno));
11493                 return libbpf_err_ptr(-EFAULT);
11494         }
11495
11496         /* step 2: calculate total size of all arrays */
11497         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11498                 bool include_array = (arrays & (1UL << i)) > 0;
11499                 struct bpf_prog_info_array_desc *desc;
11500                 __u32 count, size;
11501
11502                 desc = bpf_prog_info_array_desc + i;
11503
11504                 /* kernel is too old to support this field */
11505                 if (info_len < desc->array_offset + sizeof(__u32) ||
11506                     info_len < desc->count_offset + sizeof(__u32) ||
11507                     (desc->size_offset > 0 && info_len < desc->size_offset))
11508                         include_array = false;
11509
11510                 if (!include_array) {
11511                         arrays &= ~(1UL << i);  /* clear the bit */
11512                         continue;
11513                 }
11514
11515                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
11516                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
11517
11518                 data_len += count * size;
11519         }
11520
11521         /* step 3: allocate continuous memory */
11522         data_len = roundup(data_len, sizeof(__u64));
11523         info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
11524         if (!info_linear)
11525                 return libbpf_err_ptr(-ENOMEM);
11526
11527         /* step 4: fill data to info_linear->info */
11528         info_linear->arrays = arrays;
11529         memset(&info_linear->info, 0, sizeof(info));
11530         ptr = info_linear->data;
11531
11532         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11533                 struct bpf_prog_info_array_desc *desc;
11534                 __u32 count, size;
11535
11536                 if ((arrays & (1UL << i)) == 0)
11537                         continue;
11538
11539                 desc  = bpf_prog_info_array_desc + i;
11540                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
11541                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
11542                 bpf_prog_info_set_offset_u32(&info_linear->info,
11543                                              desc->count_offset, count);
11544                 bpf_prog_info_set_offset_u32(&info_linear->info,
11545                                              desc->size_offset, size);
11546                 bpf_prog_info_set_offset_u64(&info_linear->info,
11547                                              desc->array_offset,
11548                                              ptr_to_u64(ptr));
11549                 ptr += count * size;
11550         }
11551
11552         /* step 5: call syscall again to get required arrays */
11553         err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
11554         if (err) {
11555                 pr_debug("can't get prog info: %s", strerror(errno));
11556                 free(info_linear);
11557                 return libbpf_err_ptr(-EFAULT);
11558         }
11559
11560         /* step 6: verify the data */
11561         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11562                 struct bpf_prog_info_array_desc *desc;
11563                 __u32 v1, v2;
11564
11565                 if ((arrays & (1UL << i)) == 0)
11566                         continue;
11567
11568                 desc = bpf_prog_info_array_desc + i;
11569                 v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
11570                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
11571                                                    desc->count_offset);
11572                 if (v1 != v2)
11573                         pr_warn("%s: mismatch in element count\n", __func__);
11574
11575                 v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
11576                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
11577                                                    desc->size_offset);
11578                 if (v1 != v2)
11579                         pr_warn("%s: mismatch in rec size\n", __func__);
11580         }
11581
11582         /* step 7: update info_len and data_len */
11583         info_linear->info_len = sizeof(struct bpf_prog_info);
11584         info_linear->data_len = data_len;
11585
11586         return info_linear;
11587 }
11588
11589 void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
11590 {
11591         int i;
11592
11593         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11594                 struct bpf_prog_info_array_desc *desc;
11595                 __u64 addr, offs;
11596
11597                 if ((info_linear->arrays & (1UL << i)) == 0)
11598                         continue;
11599
11600                 desc = bpf_prog_info_array_desc + i;
11601                 addr = bpf_prog_info_read_offset_u64(&info_linear->info,
11602                                                      desc->array_offset);
11603                 offs = addr - ptr_to_u64(info_linear->data);
11604                 bpf_prog_info_set_offset_u64(&info_linear->info,
11605                                              desc->array_offset, offs);
11606         }
11607 }
11608
11609 void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
11610 {
11611         int i;
11612
11613         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11614                 struct bpf_prog_info_array_desc *desc;
11615                 __u64 addr, offs;
11616
11617                 if ((info_linear->arrays & (1UL << i)) == 0)
11618                         continue;
11619
11620                 desc = bpf_prog_info_array_desc + i;
11621                 offs = bpf_prog_info_read_offset_u64(&info_linear->info,
11622                                                      desc->array_offset);
11623                 addr = offs + ptr_to_u64(info_linear->data);
11624                 bpf_prog_info_set_offset_u64(&info_linear->info,
11625                                              desc->array_offset, addr);
11626         }
11627 }
11628
11629 int bpf_program__set_attach_target(struct bpf_program *prog,
11630                                    int attach_prog_fd,
11631                                    const char *attach_func_name)
11632 {
11633         int btf_obj_fd = 0, btf_id = 0, err;
11634
11635         if (!prog || attach_prog_fd < 0 || !attach_func_name)
11636                 return libbpf_err(-EINVAL);
11637
11638         if (prog->obj->loaded)
11639                 return libbpf_err(-EINVAL);
11640
11641         if (attach_prog_fd) {
11642                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
11643                                                  attach_prog_fd);
11644                 if (btf_id < 0)
11645                         return libbpf_err(btf_id);
11646         } else {
11647                 /* load btf_vmlinux, if not yet */
11648                 err = bpf_object__load_vmlinux_btf(prog->obj, true);
11649                 if (err)
11650                         return libbpf_err(err);
11651                 err = find_kernel_btf_id(prog->obj, attach_func_name,
11652                                          prog->expected_attach_type,
11653                                          &btf_obj_fd, &btf_id);
11654                 if (err)
11655                         return libbpf_err(err);
11656         }
11657
11658         prog->attach_btf_id = btf_id;
11659         prog->attach_btf_obj_fd = btf_obj_fd;
11660         prog->attach_prog_fd = attach_prog_fd;
11661         return 0;
11662 }
11663
11664 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
11665 {
11666         int err = 0, n, len, start, end = -1;
11667         bool *tmp;
11668
11669         *mask = NULL;
11670         *mask_sz = 0;
11671
11672         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
11673         while (*s) {
11674                 if (*s == ',' || *s == '\n') {
11675                         s++;
11676                         continue;
11677                 }
11678                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
11679                 if (n <= 0 || n > 2) {
11680                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
11681                         err = -EINVAL;
11682                         goto cleanup;
11683                 } else if (n == 1) {
11684                         end = start;
11685                 }
11686                 if (start < 0 || start > end) {
11687                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
11688                                 start, end, s);
11689                         err = -EINVAL;
11690                         goto cleanup;
11691                 }
11692                 tmp = realloc(*mask, end + 1);
11693                 if (!tmp) {
11694                         err = -ENOMEM;
11695                         goto cleanup;
11696                 }
11697                 *mask = tmp;
11698                 memset(tmp + *mask_sz, 0, start - *mask_sz);
11699                 memset(tmp + start, 1, end - start + 1);
11700                 *mask_sz = end + 1;
11701                 s += len;
11702         }
11703         if (!*mask_sz) {
11704                 pr_warn("Empty CPU range\n");
11705                 return -EINVAL;
11706         }
11707         return 0;
11708 cleanup:
11709         free(*mask);
11710         *mask = NULL;
11711         return err;
11712 }
11713
11714 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
11715 {
11716         int fd, err = 0, len;
11717         char buf[128];
11718
11719         fd = open(fcpu, O_RDONLY);
11720         if (fd < 0) {
11721                 err = -errno;
11722                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
11723                 return err;
11724         }
11725         len = read(fd, buf, sizeof(buf));
11726         close(fd);
11727         if (len <= 0) {
11728                 err = len ? -errno : -EINVAL;
11729                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
11730                 return err;
11731         }
11732         if (len >= sizeof(buf)) {
11733                 pr_warn("CPU mask is too big in file %s\n", fcpu);
11734                 return -E2BIG;
11735         }
11736         buf[len] = '\0';
11737
11738         return parse_cpu_mask_str(buf, mask, mask_sz);
11739 }
11740
11741 int libbpf_num_possible_cpus(void)
11742 {
11743         static const char *fcpu = "/sys/devices/system/cpu/possible";
11744         static int cpus;
11745         int err, n, i, tmp_cpus;
11746         bool *mask;
11747
11748         tmp_cpus = READ_ONCE(cpus);
11749         if (tmp_cpus > 0)
11750                 return tmp_cpus;
11751
11752         err = parse_cpu_mask_file(fcpu, &mask, &n);
11753         if (err)
11754                 return libbpf_err(err);
11755
11756         tmp_cpus = 0;
11757         for (i = 0; i < n; i++) {
11758                 if (mask[i])
11759                         tmp_cpus++;
11760         }
11761         free(mask);
11762
11763         WRITE_ONCE(cpus, tmp_cpus);
11764         return tmp_cpus;
11765 }
11766
11767 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
11768                               const struct bpf_object_open_opts *opts)
11769 {
11770         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
11771                 .object_name = s->name,
11772         );
11773         struct bpf_object *obj;
11774         int i, err;
11775
11776         /* Attempt to preserve opts->object_name, unless overriden by user
11777          * explicitly. Overwriting object name for skeletons is discouraged,
11778          * as it breaks global data maps, because they contain object name
11779          * prefix as their own map name prefix. When skeleton is generated,
11780          * bpftool is making an assumption that this name will stay the same.
11781          */
11782         if (opts) {
11783                 memcpy(&skel_opts, opts, sizeof(*opts));
11784                 if (!opts->object_name)
11785                         skel_opts.object_name = s->name;
11786         }
11787
11788         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
11789         err = libbpf_get_error(obj);
11790         if (err) {
11791                 pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
11792                         s->name, err);
11793                 return libbpf_err(err);
11794         }
11795
11796         *s->obj = obj;
11797
11798         for (i = 0; i < s->map_cnt; i++) {
11799                 struct bpf_map **map = s->maps[i].map;
11800                 const char *name = s->maps[i].name;
11801                 void **mmaped = s->maps[i].mmaped;
11802
11803                 *map = bpf_object__find_map_by_name(obj, name);
11804                 if (!*map) {
11805                         pr_warn("failed to find skeleton map '%s'\n", name);
11806                         return libbpf_err(-ESRCH);
11807                 }
11808
11809                 /* externs shouldn't be pre-setup from user code */
11810                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
11811                         *mmaped = (*map)->mmaped;
11812         }
11813
11814         for (i = 0; i < s->prog_cnt; i++) {
11815                 struct bpf_program **prog = s->progs[i].prog;
11816                 const char *name = s->progs[i].name;
11817
11818                 *prog = bpf_object__find_program_by_name(obj, name);
11819                 if (!*prog) {
11820                         pr_warn("failed to find skeleton program '%s'\n", name);
11821                         return libbpf_err(-ESRCH);
11822                 }
11823         }
11824
11825         return 0;
11826 }
11827
11828 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
11829 {
11830         int i, err;
11831
11832         err = bpf_object__load(*s->obj);
11833         if (err) {
11834                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
11835                 return libbpf_err(err);
11836         }
11837
11838         for (i = 0; i < s->map_cnt; i++) {
11839                 struct bpf_map *map = *s->maps[i].map;
11840                 size_t mmap_sz = bpf_map_mmap_sz(map);
11841                 int prot, map_fd = bpf_map__fd(map);
11842                 void **mmaped = s->maps[i].mmaped;
11843
11844                 if (!mmaped)
11845                         continue;
11846
11847                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
11848                         *mmaped = NULL;
11849                         continue;
11850                 }
11851
11852                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
11853                         prot = PROT_READ;
11854                 else
11855                         prot = PROT_READ | PROT_WRITE;
11856
11857                 /* Remap anonymous mmap()-ed "map initialization image" as
11858                  * a BPF map-backed mmap()-ed memory, but preserving the same
11859                  * memory address. This will cause kernel to change process'
11860                  * page table to point to a different piece of kernel memory,
11861                  * but from userspace point of view memory address (and its
11862                  * contents, being identical at this point) will stay the
11863                  * same. This mapping will be released by bpf_object__close()
11864                  * as per normal clean up procedure, so we don't need to worry
11865                  * about it from skeleton's clean up perspective.
11866                  */
11867                 *mmaped = mmap(map->mmaped, mmap_sz, prot,
11868                                 MAP_SHARED | MAP_FIXED, map_fd, 0);
11869                 if (*mmaped == MAP_FAILED) {
11870                         err = -errno;
11871                         *mmaped = NULL;
11872                         pr_warn("failed to re-mmap() map '%s': %d\n",
11873                                  bpf_map__name(map), err);
11874                         return libbpf_err(err);
11875                 }
11876         }
11877
11878         return 0;
11879 }
11880
11881 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
11882 {
11883         int i, err;
11884
11885         for (i = 0; i < s->prog_cnt; i++) {
11886                 struct bpf_program *prog = *s->progs[i].prog;
11887                 struct bpf_link **link = s->progs[i].link;
11888                 const struct bpf_sec_def *sec_def;
11889
11890                 if (!prog->load)
11891                         continue;
11892
11893                 sec_def = find_sec_def(prog->sec_name);
11894                 if (!sec_def || !sec_def->attach_fn)
11895                         continue;
11896
11897                 *link = sec_def->attach_fn(sec_def, prog);
11898                 err = libbpf_get_error(*link);
11899                 if (err) {
11900                         pr_warn("failed to auto-attach program '%s': %d\n",
11901                                 bpf_program__name(prog), err);
11902                         return libbpf_err(err);
11903                 }
11904         }
11905
11906         return 0;
11907 }
11908
11909 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
11910 {
11911         int i;
11912
11913         for (i = 0; i < s->prog_cnt; i++) {
11914                 struct bpf_link **link = s->progs[i].link;
11915
11916                 bpf_link__destroy(*link);
11917                 *link = NULL;
11918         }
11919 }
11920
11921 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
11922 {
11923         if (s->progs)
11924                 bpf_object__detach_skeleton(s);
11925         if (s->obj)
11926                 bpf_object__close(*s->obj);
11927         free(s->maps);
11928         free(s->progs);
11929         free(s);
11930 }