libbpf: Add bpf_map__pin_path function
[linux-2.6-microblaze.git] / tools / lib / bpf / libbpf.c
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4  * Common eBPF ELF object loading operations.
5  *
6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8  * Copyright (C) 2015 Huawei Inc.
9  * Copyright (C) 2017 Nicira, Inc.
10  * Copyright (C) 2019 Isovalent, Inc.
11  */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <limits.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <endian.h>
25 #include <fcntl.h>
26 #include <errno.h>
27 #include <ctype.h>
28 #include <asm/unistd.h>
29 #include <linux/err.h>
30 #include <linux/kernel.h>
31 #include <linux/bpf.h>
32 #include <linux/btf.h>
33 #include <linux/filter.h>
34 #include <linux/list.h>
35 #include <linux/limits.h>
36 #include <linux/perf_event.h>
37 #include <linux/ring_buffer.h>
38 #include <linux/version.h>
39 #include <sys/epoll.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44 #include <sys/vfs.h>
45 #include <sys/utsname.h>
46 #include <sys/resource.h>
47 #include <libelf.h>
48 #include <gelf.h>
49 #include <zlib.h>
50
51 #include "libbpf.h"
52 #include "bpf.h"
53 #include "btf.h"
54 #include "str_error.h"
55 #include "libbpf_internal.h"
56 #include "hashmap.h"
57 #include "bpf_gen_internal.h"
58
59 #ifndef BPF_FS_MAGIC
60 #define BPF_FS_MAGIC            0xcafe4a11
61 #endif
62
63 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
64
65 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
66  * compilation if user enables corresponding warning. Disable it explicitly.
67  */
68 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
69
70 #define __printf(a, b)  __attribute__((format(printf, a, b)))
71
72 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
73 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
74
75 static int __base_pr(enum libbpf_print_level level, const char *format,
76                      va_list args)
77 {
78         if (level == LIBBPF_DEBUG)
79                 return 0;
80
81         return vfprintf(stderr, format, args);
82 }
83
84 static libbpf_print_fn_t __libbpf_pr = __base_pr;
85
86 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
87 {
88         libbpf_print_fn_t old_print_fn = __libbpf_pr;
89
90         __libbpf_pr = fn;
91         return old_print_fn;
92 }
93
94 __printf(2, 3)
95 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
96 {
97         va_list args;
98
99         if (!__libbpf_pr)
100                 return;
101
102         va_start(args, format);
103         __libbpf_pr(level, format, args);
104         va_end(args);
105 }
106
107 static void pr_perm_msg(int err)
108 {
109         struct rlimit limit;
110         char buf[100];
111
112         if (err != -EPERM || geteuid() != 0)
113                 return;
114
115         err = getrlimit(RLIMIT_MEMLOCK, &limit);
116         if (err)
117                 return;
118
119         if (limit.rlim_cur == RLIM_INFINITY)
120                 return;
121
122         if (limit.rlim_cur < 1024)
123                 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
124         else if (limit.rlim_cur < 1024*1024)
125                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
126         else
127                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
128
129         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
130                 buf);
131 }
132
133 #define STRERR_BUFSIZE  128
134
135 /* Copied from tools/perf/util/util.h */
136 #ifndef zfree
137 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
138 #endif
139
140 #ifndef zclose
141 # define zclose(fd) ({                  \
142         int ___err = 0;                 \
143         if ((fd) >= 0)                  \
144                 ___err = close((fd));   \
145         fd = -1;                        \
146         ___err; })
147 #endif
148
149 static inline __u64 ptr_to_u64(const void *ptr)
150 {
151         return (__u64) (unsigned long) ptr;
152 }
153
154 /* this goes away in libbpf 1.0 */
155 enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
156
157 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
158 {
159         /* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to
160          * get all possible values we compensate last +1, and then (2*x - 1)
161          * to get the bit mask
162          */
163         if (mode != LIBBPF_STRICT_ALL
164             && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1)))
165                 return errno = EINVAL, -EINVAL;
166
167         libbpf_mode = mode;
168         return 0;
169 }
170
171 enum kern_feature_id {
172         /* v4.14: kernel support for program & map names. */
173         FEAT_PROG_NAME,
174         /* v5.2: kernel support for global data sections. */
175         FEAT_GLOBAL_DATA,
176         /* BTF support */
177         FEAT_BTF,
178         /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
179         FEAT_BTF_FUNC,
180         /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
181         FEAT_BTF_DATASEC,
182         /* BTF_FUNC_GLOBAL is supported */
183         FEAT_BTF_GLOBAL_FUNC,
184         /* BPF_F_MMAPABLE is supported for arrays */
185         FEAT_ARRAY_MMAP,
186         /* kernel support for expected_attach_type in BPF_PROG_LOAD */
187         FEAT_EXP_ATTACH_TYPE,
188         /* bpf_probe_read_{kernel,user}[_str] helpers */
189         FEAT_PROBE_READ_KERN,
190         /* BPF_PROG_BIND_MAP is supported */
191         FEAT_PROG_BIND_MAP,
192         /* Kernel support for module BTFs */
193         FEAT_MODULE_BTF,
194         /* BTF_KIND_FLOAT support */
195         FEAT_BTF_FLOAT,
196         __FEAT_CNT,
197 };
198
199 static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
200
201 enum reloc_type {
202         RELO_LD64,
203         RELO_CALL,
204         RELO_DATA,
205         RELO_EXTERN_VAR,
206         RELO_EXTERN_FUNC,
207         RELO_SUBPROG_ADDR,
208 };
209
210 struct reloc_desc {
211         enum reloc_type type;
212         int insn_idx;
213         int map_idx;
214         int sym_off;
215 };
216
217 struct bpf_sec_def;
218
219 typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
220                                         struct bpf_program *prog);
221
222 struct bpf_sec_def {
223         const char *sec;
224         size_t len;
225         enum bpf_prog_type prog_type;
226         enum bpf_attach_type expected_attach_type;
227         bool is_exp_attach_type_optional;
228         bool is_attachable;
229         bool is_attach_btf;
230         bool is_sleepable;
231         attach_fn_t attach_fn;
232 };
233
234 /*
235  * bpf_prog should be a better name but it has been used in
236  * linux/filter.h.
237  */
238 struct bpf_program {
239         const struct bpf_sec_def *sec_def;
240         char *sec_name;
241         size_t sec_idx;
242         /* this program's instruction offset (in number of instructions)
243          * within its containing ELF section
244          */
245         size_t sec_insn_off;
246         /* number of original instructions in ELF section belonging to this
247          * program, not taking into account subprogram instructions possible
248          * appended later during relocation
249          */
250         size_t sec_insn_cnt;
251         /* Offset (in number of instructions) of the start of instruction
252          * belonging to this BPF program  within its containing main BPF
253          * program. For the entry-point (main) BPF program, this is always
254          * zero. For a sub-program, this gets reset before each of main BPF
255          * programs are processed and relocated and is used to determined
256          * whether sub-program was already appended to the main program, and
257          * if yes, at which instruction offset.
258          */
259         size_t sub_insn_off;
260
261         char *name;
262         /* sec_name with / replaced by _; makes recursive pinning
263          * in bpf_object__pin_programs easier
264          */
265         char *pin_name;
266
267         /* instructions that belong to BPF program; insns[0] is located at
268          * sec_insn_off instruction within its ELF section in ELF file, so
269          * when mapping ELF file instruction index to the local instruction,
270          * one needs to subtract sec_insn_off; and vice versa.
271          */
272         struct bpf_insn *insns;
273         /* actual number of instruction in this BPF program's image; for
274          * entry-point BPF programs this includes the size of main program
275          * itself plus all the used sub-programs, appended at the end
276          */
277         size_t insns_cnt;
278
279         struct reloc_desc *reloc_desc;
280         int nr_reloc;
281         int log_level;
282
283         struct {
284                 int nr;
285                 int *fds;
286         } instances;
287         bpf_program_prep_t preprocessor;
288
289         struct bpf_object *obj;
290         void *priv;
291         bpf_program_clear_priv_t clear_priv;
292
293         bool load;
294         bool mark_btf_static;
295         enum bpf_prog_type type;
296         enum bpf_attach_type expected_attach_type;
297         int prog_ifindex;
298         __u32 attach_btf_obj_fd;
299         __u32 attach_btf_id;
300         __u32 attach_prog_fd;
301         void *func_info;
302         __u32 func_info_rec_size;
303         __u32 func_info_cnt;
304
305         void *line_info;
306         __u32 line_info_rec_size;
307         __u32 line_info_cnt;
308         __u32 prog_flags;
309 };
310
311 struct bpf_struct_ops {
312         const char *tname;
313         const struct btf_type *type;
314         struct bpf_program **progs;
315         __u32 *kern_func_off;
316         /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
317         void *data;
318         /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
319          *      btf_vmlinux's format.
320          * struct bpf_struct_ops_tcp_congestion_ops {
321          *      [... some other kernel fields ...]
322          *      struct tcp_congestion_ops data;
323          * }
324          * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
325          * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
326          * from "data".
327          */
328         void *kern_vdata;
329         __u32 type_id;
330 };
331
332 #define DATA_SEC ".data"
333 #define BSS_SEC ".bss"
334 #define RODATA_SEC ".rodata"
335 #define KCONFIG_SEC ".kconfig"
336 #define KSYMS_SEC ".ksyms"
337 #define STRUCT_OPS_SEC ".struct_ops"
338
339 enum libbpf_map_type {
340         LIBBPF_MAP_UNSPEC,
341         LIBBPF_MAP_DATA,
342         LIBBPF_MAP_BSS,
343         LIBBPF_MAP_RODATA,
344         LIBBPF_MAP_KCONFIG,
345 };
346
347 static const char * const libbpf_type_to_btf_name[] = {
348         [LIBBPF_MAP_DATA]       = DATA_SEC,
349         [LIBBPF_MAP_BSS]        = BSS_SEC,
350         [LIBBPF_MAP_RODATA]     = RODATA_SEC,
351         [LIBBPF_MAP_KCONFIG]    = KCONFIG_SEC,
352 };
353
354 struct bpf_map {
355         char *name;
356         int fd;
357         int sec_idx;
358         size_t sec_offset;
359         int map_ifindex;
360         int inner_map_fd;
361         struct bpf_map_def def;
362         __u32 numa_node;
363         __u32 btf_var_idx;
364         __u32 btf_key_type_id;
365         __u32 btf_value_type_id;
366         __u32 btf_vmlinux_value_type_id;
367         void *priv;
368         bpf_map_clear_priv_t clear_priv;
369         enum libbpf_map_type libbpf_type;
370         void *mmaped;
371         struct bpf_struct_ops *st_ops;
372         struct bpf_map *inner_map;
373         void **init_slots;
374         int init_slots_sz;
375         char *pin_path;
376         bool pinned;
377         bool reused;
378 };
379
380 enum extern_type {
381         EXT_UNKNOWN,
382         EXT_KCFG,
383         EXT_KSYM,
384 };
385
386 enum kcfg_type {
387         KCFG_UNKNOWN,
388         KCFG_CHAR,
389         KCFG_BOOL,
390         KCFG_INT,
391         KCFG_TRISTATE,
392         KCFG_CHAR_ARR,
393 };
394
395 struct extern_desc {
396         enum extern_type type;
397         int sym_idx;
398         int btf_id;
399         int sec_btf_id;
400         const char *name;
401         bool is_set;
402         bool is_weak;
403         union {
404                 struct {
405                         enum kcfg_type type;
406                         int sz;
407                         int align;
408                         int data_off;
409                         bool is_signed;
410                 } kcfg;
411                 struct {
412                         unsigned long long addr;
413
414                         /* target btf_id of the corresponding kernel var. */
415                         int kernel_btf_obj_fd;
416                         int kernel_btf_id;
417
418                         /* local btf_id of the ksym extern's type. */
419                         __u32 type_id;
420                 } ksym;
421         };
422 };
423
424 static LIST_HEAD(bpf_objects_list);
425
426 struct module_btf {
427         struct btf *btf;
428         char *name;
429         __u32 id;
430         int fd;
431 };
432
433 struct bpf_object {
434         char name[BPF_OBJ_NAME_LEN];
435         char license[64];
436         __u32 kern_version;
437
438         struct bpf_program *programs;
439         size_t nr_programs;
440         struct bpf_map *maps;
441         size_t nr_maps;
442         size_t maps_cap;
443
444         char *kconfig;
445         struct extern_desc *externs;
446         int nr_extern;
447         int kconfig_map_idx;
448         int rodata_map_idx;
449
450         bool loaded;
451         bool has_subcalls;
452
453         struct bpf_gen *gen_loader;
454
455         /*
456          * Information when doing elf related work. Only valid if fd
457          * is valid.
458          */
459         struct {
460                 int fd;
461                 const void *obj_buf;
462                 size_t obj_buf_sz;
463                 Elf *elf;
464                 GElf_Ehdr ehdr;
465                 Elf_Data *symbols;
466                 Elf_Data *data;
467                 Elf_Data *rodata;
468                 Elf_Data *bss;
469                 Elf_Data *st_ops_data;
470                 size_t shstrndx; /* section index for section name strings */
471                 size_t strtabidx;
472                 struct {
473                         GElf_Shdr shdr;
474                         Elf_Data *data;
475                 } *reloc_sects;
476                 int nr_reloc_sects;
477                 int maps_shndx;
478                 int btf_maps_shndx;
479                 __u32 btf_maps_sec_btf_id;
480                 int text_shndx;
481                 int symbols_shndx;
482                 int data_shndx;
483                 int rodata_shndx;
484                 int bss_shndx;
485                 int st_ops_shndx;
486         } efile;
487         /*
488          * All loaded bpf_object is linked in a list, which is
489          * hidden to caller. bpf_objects__<func> handlers deal with
490          * all objects.
491          */
492         struct list_head list;
493
494         struct btf *btf;
495         struct btf_ext *btf_ext;
496
497         /* Parse and load BTF vmlinux if any of the programs in the object need
498          * it at load time.
499          */
500         struct btf *btf_vmlinux;
501         /* Path to the custom BTF to be used for BPF CO-RE relocations as an
502          * override for vmlinux BTF.
503          */
504         char *btf_custom_path;
505         /* vmlinux BTF override for CO-RE relocations */
506         struct btf *btf_vmlinux_override;
507         /* Lazily initialized kernel module BTFs */
508         struct module_btf *btf_modules;
509         bool btf_modules_loaded;
510         size_t btf_module_cnt;
511         size_t btf_module_cap;
512
513         void *priv;
514         bpf_object_clear_priv_t clear_priv;
515
516         char path[];
517 };
518 #define obj_elf_valid(o)        ((o)->efile.elf)
519
520 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
521 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
522 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
523 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
524 static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
525 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
526 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
527
528 void bpf_program__unload(struct bpf_program *prog)
529 {
530         int i;
531
532         if (!prog)
533                 return;
534
535         /*
536          * If the object is opened but the program was never loaded,
537          * it is possible that prog->instances.nr == -1.
538          */
539         if (prog->instances.nr > 0) {
540                 for (i = 0; i < prog->instances.nr; i++)
541                         zclose(prog->instances.fds[i]);
542         } else if (prog->instances.nr != -1) {
543                 pr_warn("Internal error: instances.nr is %d\n",
544                         prog->instances.nr);
545         }
546
547         prog->instances.nr = -1;
548         zfree(&prog->instances.fds);
549
550         zfree(&prog->func_info);
551         zfree(&prog->line_info);
552 }
553
554 static void bpf_program__exit(struct bpf_program *prog)
555 {
556         if (!prog)
557                 return;
558
559         if (prog->clear_priv)
560                 prog->clear_priv(prog, prog->priv);
561
562         prog->priv = NULL;
563         prog->clear_priv = NULL;
564
565         bpf_program__unload(prog);
566         zfree(&prog->name);
567         zfree(&prog->sec_name);
568         zfree(&prog->pin_name);
569         zfree(&prog->insns);
570         zfree(&prog->reloc_desc);
571
572         prog->nr_reloc = 0;
573         prog->insns_cnt = 0;
574         prog->sec_idx = -1;
575 }
576
577 static char *__bpf_program__pin_name(struct bpf_program *prog)
578 {
579         char *name, *p;
580
581         name = p = strdup(prog->sec_name);
582         while ((p = strchr(p, '/')))
583                 *p = '_';
584
585         return name;
586 }
587
588 static bool insn_is_subprog_call(const struct bpf_insn *insn)
589 {
590         return BPF_CLASS(insn->code) == BPF_JMP &&
591                BPF_OP(insn->code) == BPF_CALL &&
592                BPF_SRC(insn->code) == BPF_K &&
593                insn->src_reg == BPF_PSEUDO_CALL &&
594                insn->dst_reg == 0 &&
595                insn->off == 0;
596 }
597
598 static bool is_ldimm64_insn(struct bpf_insn *insn)
599 {
600         return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
601 }
602
603 static bool is_call_insn(const struct bpf_insn *insn)
604 {
605         return insn->code == (BPF_JMP | BPF_CALL);
606 }
607
608 static bool insn_is_pseudo_func(struct bpf_insn *insn)
609 {
610         return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
611 }
612
613 static int
614 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
615                       const char *name, size_t sec_idx, const char *sec_name,
616                       size_t sec_off, void *insn_data, size_t insn_data_sz)
617 {
618         if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
619                 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
620                         sec_name, name, sec_off, insn_data_sz);
621                 return -EINVAL;
622         }
623
624         memset(prog, 0, sizeof(*prog));
625         prog->obj = obj;
626
627         prog->sec_idx = sec_idx;
628         prog->sec_insn_off = sec_off / BPF_INSN_SZ;
629         prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
630         /* insns_cnt can later be increased by appending used subprograms */
631         prog->insns_cnt = prog->sec_insn_cnt;
632
633         prog->type = BPF_PROG_TYPE_UNSPEC;
634         prog->load = true;
635
636         prog->instances.fds = NULL;
637         prog->instances.nr = -1;
638
639         prog->sec_name = strdup(sec_name);
640         if (!prog->sec_name)
641                 goto errout;
642
643         prog->name = strdup(name);
644         if (!prog->name)
645                 goto errout;
646
647         prog->pin_name = __bpf_program__pin_name(prog);
648         if (!prog->pin_name)
649                 goto errout;
650
651         prog->insns = malloc(insn_data_sz);
652         if (!prog->insns)
653                 goto errout;
654         memcpy(prog->insns, insn_data, insn_data_sz);
655
656         return 0;
657 errout:
658         pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
659         bpf_program__exit(prog);
660         return -ENOMEM;
661 }
662
663 static int
664 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
665                          const char *sec_name, int sec_idx)
666 {
667         Elf_Data *symbols = obj->efile.symbols;
668         struct bpf_program *prog, *progs;
669         void *data = sec_data->d_buf;
670         size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
671         int nr_progs, err, i;
672         const char *name;
673         GElf_Sym sym;
674
675         progs = obj->programs;
676         nr_progs = obj->nr_programs;
677         nr_syms = symbols->d_size / sizeof(GElf_Sym);
678         sec_off = 0;
679
680         for (i = 0; i < nr_syms; i++) {
681                 if (!gelf_getsym(symbols, i, &sym))
682                         continue;
683                 if (sym.st_shndx != sec_idx)
684                         continue;
685                 if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
686                         continue;
687
688                 prog_sz = sym.st_size;
689                 sec_off = sym.st_value;
690
691                 name = elf_sym_str(obj, sym.st_name);
692                 if (!name) {
693                         pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
694                                 sec_name, sec_off);
695                         return -LIBBPF_ERRNO__FORMAT;
696                 }
697
698                 if (sec_off + prog_sz > sec_sz) {
699                         pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
700                                 sec_name, sec_off);
701                         return -LIBBPF_ERRNO__FORMAT;
702                 }
703
704                 if (sec_idx != obj->efile.text_shndx && GELF_ST_BIND(sym.st_info) == STB_LOCAL) {
705                         pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
706                         return -ENOTSUP;
707                 }
708
709                 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
710                          sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
711
712                 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
713                 if (!progs) {
714                         /*
715                          * In this case the original obj->programs
716                          * is still valid, so don't need special treat for
717                          * bpf_close_object().
718                          */
719                         pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
720                                 sec_name, name);
721                         return -ENOMEM;
722                 }
723                 obj->programs = progs;
724
725                 prog = &progs[nr_progs];
726
727                 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
728                                             sec_off, data + sec_off, prog_sz);
729                 if (err)
730                         return err;
731
732                 /* if function is a global/weak symbol, but has restricted
733                  * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
734                  * as static to enable more permissive BPF verification mode
735                  * with more outside context available to BPF verifier
736                  */
737                 if (GELF_ST_BIND(sym.st_info) != STB_LOCAL
738                     && (GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN
739                         || GELF_ST_VISIBILITY(sym.st_other) == STV_INTERNAL))
740                         prog->mark_btf_static = true;
741
742                 nr_progs++;
743                 obj->nr_programs = nr_progs;
744         }
745
746         return 0;
747 }
748
749 static __u32 get_kernel_version(void)
750 {
751         __u32 major, minor, patch;
752         struct utsname info;
753
754         uname(&info);
755         if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
756                 return 0;
757         return KERNEL_VERSION(major, minor, patch);
758 }
759
760 static const struct btf_member *
761 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
762 {
763         struct btf_member *m;
764         int i;
765
766         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
767                 if (btf_member_bit_offset(t, i) == bit_offset)
768                         return m;
769         }
770
771         return NULL;
772 }
773
774 static const struct btf_member *
775 find_member_by_name(const struct btf *btf, const struct btf_type *t,
776                     const char *name)
777 {
778         struct btf_member *m;
779         int i;
780
781         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
782                 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
783                         return m;
784         }
785
786         return NULL;
787 }
788
789 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
790 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
791                                    const char *name, __u32 kind);
792
793 static int
794 find_struct_ops_kern_types(const struct btf *btf, const char *tname,
795                            const struct btf_type **type, __u32 *type_id,
796                            const struct btf_type **vtype, __u32 *vtype_id,
797                            const struct btf_member **data_member)
798 {
799         const struct btf_type *kern_type, *kern_vtype;
800         const struct btf_member *kern_data_member;
801         __s32 kern_vtype_id, kern_type_id;
802         __u32 i;
803
804         kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
805         if (kern_type_id < 0) {
806                 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
807                         tname);
808                 return kern_type_id;
809         }
810         kern_type = btf__type_by_id(btf, kern_type_id);
811
812         /* Find the corresponding "map_value" type that will be used
813          * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
814          * find "struct bpf_struct_ops_tcp_congestion_ops" from the
815          * btf_vmlinux.
816          */
817         kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
818                                                 tname, BTF_KIND_STRUCT);
819         if (kern_vtype_id < 0) {
820                 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
821                         STRUCT_OPS_VALUE_PREFIX, tname);
822                 return kern_vtype_id;
823         }
824         kern_vtype = btf__type_by_id(btf, kern_vtype_id);
825
826         /* Find "struct tcp_congestion_ops" from
827          * struct bpf_struct_ops_tcp_congestion_ops {
828          *      [ ... ]
829          *      struct tcp_congestion_ops data;
830          * }
831          */
832         kern_data_member = btf_members(kern_vtype);
833         for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
834                 if (kern_data_member->type == kern_type_id)
835                         break;
836         }
837         if (i == btf_vlen(kern_vtype)) {
838                 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
839                         tname, STRUCT_OPS_VALUE_PREFIX, tname);
840                 return -EINVAL;
841         }
842
843         *type = kern_type;
844         *type_id = kern_type_id;
845         *vtype = kern_vtype;
846         *vtype_id = kern_vtype_id;
847         *data_member = kern_data_member;
848
849         return 0;
850 }
851
852 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
853 {
854         return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
855 }
856
857 /* Init the map's fields that depend on kern_btf */
858 static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
859                                          const struct btf *btf,
860                                          const struct btf *kern_btf)
861 {
862         const struct btf_member *member, *kern_member, *kern_data_member;
863         const struct btf_type *type, *kern_type, *kern_vtype;
864         __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
865         struct bpf_struct_ops *st_ops;
866         void *data, *kern_data;
867         const char *tname;
868         int err;
869
870         st_ops = map->st_ops;
871         type = st_ops->type;
872         tname = st_ops->tname;
873         err = find_struct_ops_kern_types(kern_btf, tname,
874                                          &kern_type, &kern_type_id,
875                                          &kern_vtype, &kern_vtype_id,
876                                          &kern_data_member);
877         if (err)
878                 return err;
879
880         pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
881                  map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
882
883         map->def.value_size = kern_vtype->size;
884         map->btf_vmlinux_value_type_id = kern_vtype_id;
885
886         st_ops->kern_vdata = calloc(1, kern_vtype->size);
887         if (!st_ops->kern_vdata)
888                 return -ENOMEM;
889
890         data = st_ops->data;
891         kern_data_off = kern_data_member->offset / 8;
892         kern_data = st_ops->kern_vdata + kern_data_off;
893
894         member = btf_members(type);
895         for (i = 0; i < btf_vlen(type); i++, member++) {
896                 const struct btf_type *mtype, *kern_mtype;
897                 __u32 mtype_id, kern_mtype_id;
898                 void *mdata, *kern_mdata;
899                 __s64 msize, kern_msize;
900                 __u32 moff, kern_moff;
901                 __u32 kern_member_idx;
902                 const char *mname;
903
904                 mname = btf__name_by_offset(btf, member->name_off);
905                 kern_member = find_member_by_name(kern_btf, kern_type, mname);
906                 if (!kern_member) {
907                         pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
908                                 map->name, mname);
909                         return -ENOTSUP;
910                 }
911
912                 kern_member_idx = kern_member - btf_members(kern_type);
913                 if (btf_member_bitfield_size(type, i) ||
914                     btf_member_bitfield_size(kern_type, kern_member_idx)) {
915                         pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
916                                 map->name, mname);
917                         return -ENOTSUP;
918                 }
919
920                 moff = member->offset / 8;
921                 kern_moff = kern_member->offset / 8;
922
923                 mdata = data + moff;
924                 kern_mdata = kern_data + kern_moff;
925
926                 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
927                 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
928                                                     &kern_mtype_id);
929                 if (BTF_INFO_KIND(mtype->info) !=
930                     BTF_INFO_KIND(kern_mtype->info)) {
931                         pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
932                                 map->name, mname, BTF_INFO_KIND(mtype->info),
933                                 BTF_INFO_KIND(kern_mtype->info));
934                         return -ENOTSUP;
935                 }
936
937                 if (btf_is_ptr(mtype)) {
938                         struct bpf_program *prog;
939
940                         prog = st_ops->progs[i];
941                         if (!prog)
942                                 continue;
943
944                         kern_mtype = skip_mods_and_typedefs(kern_btf,
945                                                             kern_mtype->type,
946                                                             &kern_mtype_id);
947
948                         /* mtype->type must be a func_proto which was
949                          * guaranteed in bpf_object__collect_st_ops_relos(),
950                          * so only check kern_mtype for func_proto here.
951                          */
952                         if (!btf_is_func_proto(kern_mtype)) {
953                                 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
954                                         map->name, mname);
955                                 return -ENOTSUP;
956                         }
957
958                         prog->attach_btf_id = kern_type_id;
959                         prog->expected_attach_type = kern_member_idx;
960
961                         st_ops->kern_func_off[i] = kern_data_off + kern_moff;
962
963                         pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
964                                  map->name, mname, prog->name, moff,
965                                  kern_moff);
966
967                         continue;
968                 }
969
970                 msize = btf__resolve_size(btf, mtype_id);
971                 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
972                 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
973                         pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
974                                 map->name, mname, (ssize_t)msize,
975                                 (ssize_t)kern_msize);
976                         return -ENOTSUP;
977                 }
978
979                 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
980                          map->name, mname, (unsigned int)msize,
981                          moff, kern_moff);
982                 memcpy(kern_mdata, mdata, msize);
983         }
984
985         return 0;
986 }
987
988 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
989 {
990         struct bpf_map *map;
991         size_t i;
992         int err;
993
994         for (i = 0; i < obj->nr_maps; i++) {
995                 map = &obj->maps[i];
996
997                 if (!bpf_map__is_struct_ops(map))
998                         continue;
999
1000                 err = bpf_map__init_kern_struct_ops(map, obj->btf,
1001                                                     obj->btf_vmlinux);
1002                 if (err)
1003                         return err;
1004         }
1005
1006         return 0;
1007 }
1008
1009 static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
1010 {
1011         const struct btf_type *type, *datasec;
1012         const struct btf_var_secinfo *vsi;
1013         struct bpf_struct_ops *st_ops;
1014         const char *tname, *var_name;
1015         __s32 type_id, datasec_id;
1016         const struct btf *btf;
1017         struct bpf_map *map;
1018         __u32 i;
1019
1020         if (obj->efile.st_ops_shndx == -1)
1021                 return 0;
1022
1023         btf = obj->btf;
1024         datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
1025                                             BTF_KIND_DATASEC);
1026         if (datasec_id < 0) {
1027                 pr_warn("struct_ops init: DATASEC %s not found\n",
1028                         STRUCT_OPS_SEC);
1029                 return -EINVAL;
1030         }
1031
1032         datasec = btf__type_by_id(btf, datasec_id);
1033         vsi = btf_var_secinfos(datasec);
1034         for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1035                 type = btf__type_by_id(obj->btf, vsi->type);
1036                 var_name = btf__name_by_offset(obj->btf, type->name_off);
1037
1038                 type_id = btf__resolve_type(obj->btf, vsi->type);
1039                 if (type_id < 0) {
1040                         pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1041                                 vsi->type, STRUCT_OPS_SEC);
1042                         return -EINVAL;
1043                 }
1044
1045                 type = btf__type_by_id(obj->btf, type_id);
1046                 tname = btf__name_by_offset(obj->btf, type->name_off);
1047                 if (!tname[0]) {
1048                         pr_warn("struct_ops init: anonymous type is not supported\n");
1049                         return -ENOTSUP;
1050                 }
1051                 if (!btf_is_struct(type)) {
1052                         pr_warn("struct_ops init: %s is not a struct\n", tname);
1053                         return -EINVAL;
1054                 }
1055
1056                 map = bpf_object__add_map(obj);
1057                 if (IS_ERR(map))
1058                         return PTR_ERR(map);
1059
1060                 map->sec_idx = obj->efile.st_ops_shndx;
1061                 map->sec_offset = vsi->offset;
1062                 map->name = strdup(var_name);
1063                 if (!map->name)
1064                         return -ENOMEM;
1065
1066                 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1067                 map->def.key_size = sizeof(int);
1068                 map->def.value_size = type->size;
1069                 map->def.max_entries = 1;
1070
1071                 map->st_ops = calloc(1, sizeof(*map->st_ops));
1072                 if (!map->st_ops)
1073                         return -ENOMEM;
1074                 st_ops = map->st_ops;
1075                 st_ops->data = malloc(type->size);
1076                 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1077                 st_ops->kern_func_off = malloc(btf_vlen(type) *
1078                                                sizeof(*st_ops->kern_func_off));
1079                 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1080                         return -ENOMEM;
1081
1082                 if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
1083                         pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1084                                 var_name, STRUCT_OPS_SEC);
1085                         return -EINVAL;
1086                 }
1087
1088                 memcpy(st_ops->data,
1089                        obj->efile.st_ops_data->d_buf + vsi->offset,
1090                        type->size);
1091                 st_ops->tname = tname;
1092                 st_ops->type = type;
1093                 st_ops->type_id = type_id;
1094
1095                 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1096                          tname, type_id, var_name, vsi->offset);
1097         }
1098
1099         return 0;
1100 }
1101
1102 static struct bpf_object *bpf_object__new(const char *path,
1103                                           const void *obj_buf,
1104                                           size_t obj_buf_sz,
1105                                           const char *obj_name)
1106 {
1107         struct bpf_object *obj;
1108         char *end;
1109
1110         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1111         if (!obj) {
1112                 pr_warn("alloc memory failed for %s\n", path);
1113                 return ERR_PTR(-ENOMEM);
1114         }
1115
1116         strcpy(obj->path, path);
1117         if (obj_name) {
1118                 strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
1119                 obj->name[sizeof(obj->name) - 1] = 0;
1120         } else {
1121                 /* Using basename() GNU version which doesn't modify arg. */
1122                 strncpy(obj->name, basename((void *)path),
1123                         sizeof(obj->name) - 1);
1124                 end = strchr(obj->name, '.');
1125                 if (end)
1126                         *end = 0;
1127         }
1128
1129         obj->efile.fd = -1;
1130         /*
1131          * Caller of this function should also call
1132          * bpf_object__elf_finish() after data collection to return
1133          * obj_buf to user. If not, we should duplicate the buffer to
1134          * avoid user freeing them before elf finish.
1135          */
1136         obj->efile.obj_buf = obj_buf;
1137         obj->efile.obj_buf_sz = obj_buf_sz;
1138         obj->efile.maps_shndx = -1;
1139         obj->efile.btf_maps_shndx = -1;
1140         obj->efile.data_shndx = -1;
1141         obj->efile.rodata_shndx = -1;
1142         obj->efile.bss_shndx = -1;
1143         obj->efile.st_ops_shndx = -1;
1144         obj->kconfig_map_idx = -1;
1145         obj->rodata_map_idx = -1;
1146
1147         obj->kern_version = get_kernel_version();
1148         obj->loaded = false;
1149
1150         INIT_LIST_HEAD(&obj->list);
1151         list_add(&obj->list, &bpf_objects_list);
1152         return obj;
1153 }
1154
1155 static void bpf_object__elf_finish(struct bpf_object *obj)
1156 {
1157         if (!obj_elf_valid(obj))
1158                 return;
1159
1160         if (obj->efile.elf) {
1161                 elf_end(obj->efile.elf);
1162                 obj->efile.elf = NULL;
1163         }
1164         obj->efile.symbols = NULL;
1165         obj->efile.data = NULL;
1166         obj->efile.rodata = NULL;
1167         obj->efile.bss = NULL;
1168         obj->efile.st_ops_data = NULL;
1169
1170         zfree(&obj->efile.reloc_sects);
1171         obj->efile.nr_reloc_sects = 0;
1172         zclose(obj->efile.fd);
1173         obj->efile.obj_buf = NULL;
1174         obj->efile.obj_buf_sz = 0;
1175 }
1176
1177 static int bpf_object__elf_init(struct bpf_object *obj)
1178 {
1179         int err = 0;
1180         GElf_Ehdr *ep;
1181
1182         if (obj_elf_valid(obj)) {
1183                 pr_warn("elf: init internal error\n");
1184                 return -LIBBPF_ERRNO__LIBELF;
1185         }
1186
1187         if (obj->efile.obj_buf_sz > 0) {
1188                 /*
1189                  * obj_buf should have been validated by
1190                  * bpf_object__open_buffer().
1191                  */
1192                 obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
1193                                             obj->efile.obj_buf_sz);
1194         } else {
1195                 obj->efile.fd = open(obj->path, O_RDONLY);
1196                 if (obj->efile.fd < 0) {
1197                         char errmsg[STRERR_BUFSIZE], *cp;
1198
1199                         err = -errno;
1200                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1201                         pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1202                         return err;
1203                 }
1204
1205                 obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1206         }
1207
1208         if (!obj->efile.elf) {
1209                 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1210                 err = -LIBBPF_ERRNO__LIBELF;
1211                 goto errout;
1212         }
1213
1214         if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
1215                 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1216                 err = -LIBBPF_ERRNO__FORMAT;
1217                 goto errout;
1218         }
1219         ep = &obj->efile.ehdr;
1220
1221         if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
1222                 pr_warn("elf: failed to get section names section index for %s: %s\n",
1223                         obj->path, elf_errmsg(-1));
1224                 err = -LIBBPF_ERRNO__FORMAT;
1225                 goto errout;
1226         }
1227
1228         /* Elf is corrupted/truncated, avoid calling elf_strptr. */
1229         if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
1230                 pr_warn("elf: failed to get section names strings from %s: %s\n",
1231                         obj->path, elf_errmsg(-1));
1232                 err = -LIBBPF_ERRNO__FORMAT;
1233                 goto errout;
1234         }
1235
1236         /* Old LLVM set e_machine to EM_NONE */
1237         if (ep->e_type != ET_REL ||
1238             (ep->e_machine && ep->e_machine != EM_BPF)) {
1239                 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1240                 err = -LIBBPF_ERRNO__FORMAT;
1241                 goto errout;
1242         }
1243
1244         return 0;
1245 errout:
1246         bpf_object__elf_finish(obj);
1247         return err;
1248 }
1249
1250 static int bpf_object__check_endianness(struct bpf_object *obj)
1251 {
1252 #if __BYTE_ORDER == __LITTLE_ENDIAN
1253         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
1254                 return 0;
1255 #elif __BYTE_ORDER == __BIG_ENDIAN
1256         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
1257                 return 0;
1258 #else
1259 # error "Unrecognized __BYTE_ORDER__"
1260 #endif
1261         pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1262         return -LIBBPF_ERRNO__ENDIAN;
1263 }
1264
1265 static int
1266 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1267 {
1268         memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
1269         pr_debug("license of %s is %s\n", obj->path, obj->license);
1270         return 0;
1271 }
1272
1273 static int
1274 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1275 {
1276         __u32 kver;
1277
1278         if (size != sizeof(kver)) {
1279                 pr_warn("invalid kver section in %s\n", obj->path);
1280                 return -LIBBPF_ERRNO__FORMAT;
1281         }
1282         memcpy(&kver, data, sizeof(kver));
1283         obj->kern_version = kver;
1284         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1285         return 0;
1286 }
1287
1288 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1289 {
1290         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1291             type == BPF_MAP_TYPE_HASH_OF_MAPS)
1292                 return true;
1293         return false;
1294 }
1295
1296 int bpf_object__section_size(const struct bpf_object *obj, const char *name,
1297                              __u32 *size)
1298 {
1299         int ret = -ENOENT;
1300
1301         *size = 0;
1302         if (!name) {
1303                 return -EINVAL;
1304         } else if (!strcmp(name, DATA_SEC)) {
1305                 if (obj->efile.data)
1306                         *size = obj->efile.data->d_size;
1307         } else if (!strcmp(name, BSS_SEC)) {
1308                 if (obj->efile.bss)
1309                         *size = obj->efile.bss->d_size;
1310         } else if (!strcmp(name, RODATA_SEC)) {
1311                 if (obj->efile.rodata)
1312                         *size = obj->efile.rodata->d_size;
1313         } else if (!strcmp(name, STRUCT_OPS_SEC)) {
1314                 if (obj->efile.st_ops_data)
1315                         *size = obj->efile.st_ops_data->d_size;
1316         } else {
1317                 Elf_Scn *scn = elf_sec_by_name(obj, name);
1318                 Elf_Data *data = elf_sec_data(obj, scn);
1319
1320                 if (data) {
1321                         ret = 0; /* found it */
1322                         *size = data->d_size;
1323                 }
1324         }
1325
1326         return *size ? 0 : ret;
1327 }
1328
1329 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
1330                                 __u32 *off)
1331 {
1332         Elf_Data *symbols = obj->efile.symbols;
1333         const char *sname;
1334         size_t si;
1335
1336         if (!name || !off)
1337                 return -EINVAL;
1338
1339         for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
1340                 GElf_Sym sym;
1341
1342                 if (!gelf_getsym(symbols, si, &sym))
1343                         continue;
1344                 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1345                     GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
1346                         continue;
1347
1348                 sname = elf_sym_str(obj, sym.st_name);
1349                 if (!sname) {
1350                         pr_warn("failed to get sym name string for var %s\n",
1351                                 name);
1352                         return -EIO;
1353                 }
1354                 if (strcmp(name, sname) == 0) {
1355                         *off = sym.st_value;
1356                         return 0;
1357                 }
1358         }
1359
1360         return -ENOENT;
1361 }
1362
1363 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1364 {
1365         struct bpf_map *new_maps;
1366         size_t new_cap;
1367         int i;
1368
1369         if (obj->nr_maps < obj->maps_cap)
1370                 return &obj->maps[obj->nr_maps++];
1371
1372         new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
1373         new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
1374         if (!new_maps) {
1375                 pr_warn("alloc maps for object failed\n");
1376                 return ERR_PTR(-ENOMEM);
1377         }
1378
1379         obj->maps_cap = new_cap;
1380         obj->maps = new_maps;
1381
1382         /* zero out new maps */
1383         memset(obj->maps + obj->nr_maps, 0,
1384                (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
1385         /*
1386          * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
1387          * when failure (zclose won't close negative fd)).
1388          */
1389         for (i = obj->nr_maps; i < obj->maps_cap; i++) {
1390                 obj->maps[i].fd = -1;
1391                 obj->maps[i].inner_map_fd = -1;
1392         }
1393
1394         return &obj->maps[obj->nr_maps++];
1395 }
1396
1397 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1398 {
1399         long page_sz = sysconf(_SC_PAGE_SIZE);
1400         size_t map_sz;
1401
1402         map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
1403         map_sz = roundup(map_sz, page_sz);
1404         return map_sz;
1405 }
1406
1407 static char *internal_map_name(struct bpf_object *obj,
1408                                enum libbpf_map_type type)
1409 {
1410         char map_name[BPF_OBJ_NAME_LEN], *p;
1411         const char *sfx = libbpf_type_to_btf_name[type];
1412         int sfx_len = max((size_t)7, strlen(sfx));
1413         int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
1414                           strlen(obj->name));
1415
1416         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1417                  sfx_len, libbpf_type_to_btf_name[type]);
1418
1419         /* sanitise map name to characters allowed by kernel */
1420         for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1421                 if (!isalnum(*p) && *p != '_' && *p != '.')
1422                         *p = '_';
1423
1424         return strdup(map_name);
1425 }
1426
1427 static int
1428 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1429                               int sec_idx, void *data, size_t data_sz)
1430 {
1431         struct bpf_map_def *def;
1432         struct bpf_map *map;
1433         int err;
1434
1435         map = bpf_object__add_map(obj);
1436         if (IS_ERR(map))
1437                 return PTR_ERR(map);
1438
1439         map->libbpf_type = type;
1440         map->sec_idx = sec_idx;
1441         map->sec_offset = 0;
1442         map->name = internal_map_name(obj, type);
1443         if (!map->name) {
1444                 pr_warn("failed to alloc map name\n");
1445                 return -ENOMEM;
1446         }
1447
1448         def = &map->def;
1449         def->type = BPF_MAP_TYPE_ARRAY;
1450         def->key_size = sizeof(int);
1451         def->value_size = data_sz;
1452         def->max_entries = 1;
1453         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1454                          ? BPF_F_RDONLY_PROG : 0;
1455         def->map_flags |= BPF_F_MMAPABLE;
1456
1457         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1458                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
1459
1460         map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
1461                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1462         if (map->mmaped == MAP_FAILED) {
1463                 err = -errno;
1464                 map->mmaped = NULL;
1465                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1466                         map->name, err);
1467                 zfree(&map->name);
1468                 return err;
1469         }
1470
1471         if (data)
1472                 memcpy(map->mmaped, data, data_sz);
1473
1474         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1475         return 0;
1476 }
1477
1478 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1479 {
1480         int err;
1481
1482         /*
1483          * Populate obj->maps with libbpf internal maps.
1484          */
1485         if (obj->efile.data_shndx >= 0) {
1486                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1487                                                     obj->efile.data_shndx,
1488                                                     obj->efile.data->d_buf,
1489                                                     obj->efile.data->d_size);
1490                 if (err)
1491                         return err;
1492         }
1493         if (obj->efile.rodata_shndx >= 0) {
1494                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1495                                                     obj->efile.rodata_shndx,
1496                                                     obj->efile.rodata->d_buf,
1497                                                     obj->efile.rodata->d_size);
1498                 if (err)
1499                         return err;
1500
1501                 obj->rodata_map_idx = obj->nr_maps - 1;
1502         }
1503         if (obj->efile.bss_shndx >= 0) {
1504                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1505                                                     obj->efile.bss_shndx,
1506                                                     NULL,
1507                                                     obj->efile.bss->d_size);
1508                 if (err)
1509                         return err;
1510         }
1511         return 0;
1512 }
1513
1514
1515 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1516                                                const void *name)
1517 {
1518         int i;
1519
1520         for (i = 0; i < obj->nr_extern; i++) {
1521                 if (strcmp(obj->externs[i].name, name) == 0)
1522                         return &obj->externs[i];
1523         }
1524         return NULL;
1525 }
1526
1527 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1528                               char value)
1529 {
1530         switch (ext->kcfg.type) {
1531         case KCFG_BOOL:
1532                 if (value == 'm') {
1533                         pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
1534                                 ext->name, value);
1535                         return -EINVAL;
1536                 }
1537                 *(bool *)ext_val = value == 'y' ? true : false;
1538                 break;
1539         case KCFG_TRISTATE:
1540                 if (value == 'y')
1541                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1542                 else if (value == 'm')
1543                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1544                 else /* value == 'n' */
1545                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1546                 break;
1547         case KCFG_CHAR:
1548                 *(char *)ext_val = value;
1549                 break;
1550         case KCFG_UNKNOWN:
1551         case KCFG_INT:
1552         case KCFG_CHAR_ARR:
1553         default:
1554                 pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
1555                         ext->name, value);
1556                 return -EINVAL;
1557         }
1558         ext->is_set = true;
1559         return 0;
1560 }
1561
1562 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1563                               const char *value)
1564 {
1565         size_t len;
1566
1567         if (ext->kcfg.type != KCFG_CHAR_ARR) {
1568                 pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
1569                 return -EINVAL;
1570         }
1571
1572         len = strlen(value);
1573         if (value[len - 1] != '"') {
1574                 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1575                         ext->name, value);
1576                 return -EINVAL;
1577         }
1578
1579         /* strip quotes */
1580         len -= 2;
1581         if (len >= ext->kcfg.sz) {
1582                 pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
1583                         ext->name, value, len, ext->kcfg.sz - 1);
1584                 len = ext->kcfg.sz - 1;
1585         }
1586         memcpy(ext_val, value + 1, len);
1587         ext_val[len] = '\0';
1588         ext->is_set = true;
1589         return 0;
1590 }
1591
1592 static int parse_u64(const char *value, __u64 *res)
1593 {
1594         char *value_end;
1595         int err;
1596
1597         errno = 0;
1598         *res = strtoull(value, &value_end, 0);
1599         if (errno) {
1600                 err = -errno;
1601                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1602                 return err;
1603         }
1604         if (*value_end) {
1605                 pr_warn("failed to parse '%s' as integer completely\n", value);
1606                 return -EINVAL;
1607         }
1608         return 0;
1609 }
1610
1611 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1612 {
1613         int bit_sz = ext->kcfg.sz * 8;
1614
1615         if (ext->kcfg.sz == 8)
1616                 return true;
1617
1618         /* Validate that value stored in u64 fits in integer of `ext->sz`
1619          * bytes size without any loss of information. If the target integer
1620          * is signed, we rely on the following limits of integer type of
1621          * Y bits and subsequent transformation:
1622          *
1623          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1624          *            0 <= X + 2^(Y-1) <= 2^Y - 1
1625          *            0 <= X + 2^(Y-1) <  2^Y
1626          *
1627          *  For unsigned target integer, check that all the (64 - Y) bits are
1628          *  zero.
1629          */
1630         if (ext->kcfg.is_signed)
1631                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1632         else
1633                 return (v >> bit_sz) == 0;
1634 }
1635
1636 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1637                               __u64 value)
1638 {
1639         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1640                 pr_warn("extern (kcfg) %s=%llu should be integer\n",
1641                         ext->name, (unsigned long long)value);
1642                 return -EINVAL;
1643         }
1644         if (!is_kcfg_value_in_range(ext, value)) {
1645                 pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
1646                         ext->name, (unsigned long long)value, ext->kcfg.sz);
1647                 return -ERANGE;
1648         }
1649         switch (ext->kcfg.sz) {
1650                 case 1: *(__u8 *)ext_val = value; break;
1651                 case 2: *(__u16 *)ext_val = value; break;
1652                 case 4: *(__u32 *)ext_val = value; break;
1653                 case 8: *(__u64 *)ext_val = value; break;
1654                 default:
1655                         return -EINVAL;
1656         }
1657         ext->is_set = true;
1658         return 0;
1659 }
1660
1661 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1662                                             char *buf, void *data)
1663 {
1664         struct extern_desc *ext;
1665         char *sep, *value;
1666         int len, err = 0;
1667         void *ext_val;
1668         __u64 num;
1669
1670         if (strncmp(buf, "CONFIG_", 7))
1671                 return 0;
1672
1673         sep = strchr(buf, '=');
1674         if (!sep) {
1675                 pr_warn("failed to parse '%s': no separator\n", buf);
1676                 return -EINVAL;
1677         }
1678
1679         /* Trim ending '\n' */
1680         len = strlen(buf);
1681         if (buf[len - 1] == '\n')
1682                 buf[len - 1] = '\0';
1683         /* Split on '=' and ensure that a value is present. */
1684         *sep = '\0';
1685         if (!sep[1]) {
1686                 *sep = '=';
1687                 pr_warn("failed to parse '%s': no value\n", buf);
1688                 return -EINVAL;
1689         }
1690
1691         ext = find_extern_by_name(obj, buf);
1692         if (!ext || ext->is_set)
1693                 return 0;
1694
1695         ext_val = data + ext->kcfg.data_off;
1696         value = sep + 1;
1697
1698         switch (*value) {
1699         case 'y': case 'n': case 'm':
1700                 err = set_kcfg_value_tri(ext, ext_val, *value);
1701                 break;
1702         case '"':
1703                 err = set_kcfg_value_str(ext, ext_val, value);
1704                 break;
1705         default:
1706                 /* assume integer */
1707                 err = parse_u64(value, &num);
1708                 if (err) {
1709                         pr_warn("extern (kcfg) %s=%s should be integer\n",
1710                                 ext->name, value);
1711                         return err;
1712                 }
1713                 err = set_kcfg_value_num(ext, ext_val, num);
1714                 break;
1715         }
1716         if (err)
1717                 return err;
1718         pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
1719         return 0;
1720 }
1721
1722 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1723 {
1724         char buf[PATH_MAX];
1725         struct utsname uts;
1726         int len, err = 0;
1727         gzFile file;
1728
1729         uname(&uts);
1730         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1731         if (len < 0)
1732                 return -EINVAL;
1733         else if (len >= PATH_MAX)
1734                 return -ENAMETOOLONG;
1735
1736         /* gzopen also accepts uncompressed files. */
1737         file = gzopen(buf, "r");
1738         if (!file)
1739                 file = gzopen("/proc/config.gz", "r");
1740
1741         if (!file) {
1742                 pr_warn("failed to open system Kconfig\n");
1743                 return -ENOENT;
1744         }
1745
1746         while (gzgets(file, buf, sizeof(buf))) {
1747                 err = bpf_object__process_kconfig_line(obj, buf, data);
1748                 if (err) {
1749                         pr_warn("error parsing system Kconfig line '%s': %d\n",
1750                                 buf, err);
1751                         goto out;
1752                 }
1753         }
1754
1755 out:
1756         gzclose(file);
1757         return err;
1758 }
1759
1760 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1761                                         const char *config, void *data)
1762 {
1763         char buf[PATH_MAX];
1764         int err = 0;
1765         FILE *file;
1766
1767         file = fmemopen((void *)config, strlen(config), "r");
1768         if (!file) {
1769                 err = -errno;
1770                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
1771                 return err;
1772         }
1773
1774         while (fgets(buf, sizeof(buf), file)) {
1775                 err = bpf_object__process_kconfig_line(obj, buf, data);
1776                 if (err) {
1777                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1778                                 buf, err);
1779                         break;
1780                 }
1781         }
1782
1783         fclose(file);
1784         return err;
1785 }
1786
1787 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
1788 {
1789         struct extern_desc *last_ext = NULL, *ext;
1790         size_t map_sz;
1791         int i, err;
1792
1793         for (i = 0; i < obj->nr_extern; i++) {
1794                 ext = &obj->externs[i];
1795                 if (ext->type == EXT_KCFG)
1796                         last_ext = ext;
1797         }
1798
1799         if (!last_ext)
1800                 return 0;
1801
1802         map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
1803         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
1804                                             obj->efile.symbols_shndx,
1805                                             NULL, map_sz);
1806         if (err)
1807                 return err;
1808
1809         obj->kconfig_map_idx = obj->nr_maps - 1;
1810
1811         return 0;
1812 }
1813
1814 static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
1815 {
1816         Elf_Data *symbols = obj->efile.symbols;
1817         int i, map_def_sz = 0, nr_maps = 0, nr_syms;
1818         Elf_Data *data = NULL;
1819         Elf_Scn *scn;
1820
1821         if (obj->efile.maps_shndx < 0)
1822                 return 0;
1823
1824         if (!symbols)
1825                 return -EINVAL;
1826
1827         scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
1828         data = elf_sec_data(obj, scn);
1829         if (!scn || !data) {
1830                 pr_warn("elf: failed to get legacy map definitions for %s\n",
1831                         obj->path);
1832                 return -EINVAL;
1833         }
1834
1835         /*
1836          * Count number of maps. Each map has a name.
1837          * Array of maps is not supported: only the first element is
1838          * considered.
1839          *
1840          * TODO: Detect array of map and report error.
1841          */
1842         nr_syms = symbols->d_size / sizeof(GElf_Sym);
1843         for (i = 0; i < nr_syms; i++) {
1844                 GElf_Sym sym;
1845
1846                 if (!gelf_getsym(symbols, i, &sym))
1847                         continue;
1848                 if (sym.st_shndx != obj->efile.maps_shndx)
1849                         continue;
1850                 nr_maps++;
1851         }
1852         /* Assume equally sized map definitions */
1853         pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
1854                  nr_maps, data->d_size, obj->path);
1855
1856         if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
1857                 pr_warn("elf: unable to determine legacy map definition size in %s\n",
1858                         obj->path);
1859                 return -EINVAL;
1860         }
1861         map_def_sz = data->d_size / nr_maps;
1862
1863         /* Fill obj->maps using data in "maps" section.  */
1864         for (i = 0; i < nr_syms; i++) {
1865                 GElf_Sym sym;
1866                 const char *map_name;
1867                 struct bpf_map_def *def;
1868                 struct bpf_map *map;
1869
1870                 if (!gelf_getsym(symbols, i, &sym))
1871                         continue;
1872                 if (sym.st_shndx != obj->efile.maps_shndx)
1873                         continue;
1874
1875                 map = bpf_object__add_map(obj);
1876                 if (IS_ERR(map))
1877                         return PTR_ERR(map);
1878
1879                 map_name = elf_sym_str(obj, sym.st_name);
1880                 if (!map_name) {
1881                         pr_warn("failed to get map #%d name sym string for obj %s\n",
1882                                 i, obj->path);
1883                         return -LIBBPF_ERRNO__FORMAT;
1884                 }
1885
1886                 if (GELF_ST_TYPE(sym.st_info) == STT_SECTION
1887                     || GELF_ST_BIND(sym.st_info) == STB_LOCAL) {
1888                         pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
1889                         return -ENOTSUP;
1890                 }
1891
1892                 map->libbpf_type = LIBBPF_MAP_UNSPEC;
1893                 map->sec_idx = sym.st_shndx;
1894                 map->sec_offset = sym.st_value;
1895                 pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
1896                          map_name, map->sec_idx, map->sec_offset);
1897                 if (sym.st_value + map_def_sz > data->d_size) {
1898                         pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
1899                                 obj->path, map_name);
1900                         return -EINVAL;
1901                 }
1902
1903                 map->name = strdup(map_name);
1904                 if (!map->name) {
1905                         pr_warn("failed to alloc map name\n");
1906                         return -ENOMEM;
1907                 }
1908                 pr_debug("map %d is \"%s\"\n", i, map->name);
1909                 def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
1910                 /*
1911                  * If the definition of the map in the object file fits in
1912                  * bpf_map_def, copy it.  Any extra fields in our version
1913                  * of bpf_map_def will default to zero as a result of the
1914                  * calloc above.
1915                  */
1916                 if (map_def_sz <= sizeof(struct bpf_map_def)) {
1917                         memcpy(&map->def, def, map_def_sz);
1918                 } else {
1919                         /*
1920                          * Here the map structure being read is bigger than what
1921                          * we expect, truncate if the excess bits are all zero.
1922                          * If they are not zero, reject this map as
1923                          * incompatible.
1924                          */
1925                         char *b;
1926
1927                         for (b = ((char *)def) + sizeof(struct bpf_map_def);
1928                              b < ((char *)def) + map_def_sz; b++) {
1929                                 if (*b != 0) {
1930                                         pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
1931                                                 obj->path, map_name);
1932                                         if (strict)
1933                                                 return -EINVAL;
1934                                 }
1935                         }
1936                         memcpy(&map->def, def, sizeof(struct bpf_map_def));
1937                 }
1938         }
1939         return 0;
1940 }
1941
1942 const struct btf_type *
1943 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
1944 {
1945         const struct btf_type *t = btf__type_by_id(btf, id);
1946
1947         if (res_id)
1948                 *res_id = id;
1949
1950         while (btf_is_mod(t) || btf_is_typedef(t)) {
1951                 if (res_id)
1952                         *res_id = t->type;
1953                 t = btf__type_by_id(btf, t->type);
1954         }
1955
1956         return t;
1957 }
1958
1959 static const struct btf_type *
1960 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
1961 {
1962         const struct btf_type *t;
1963
1964         t = skip_mods_and_typedefs(btf, id, NULL);
1965         if (!btf_is_ptr(t))
1966                 return NULL;
1967
1968         t = skip_mods_and_typedefs(btf, t->type, res_id);
1969
1970         return btf_is_func_proto(t) ? t : NULL;
1971 }
1972
1973 static const char *__btf_kind_str(__u16 kind)
1974 {
1975         switch (kind) {
1976         case BTF_KIND_UNKN: return "void";
1977         case BTF_KIND_INT: return "int";
1978         case BTF_KIND_PTR: return "ptr";
1979         case BTF_KIND_ARRAY: return "array";
1980         case BTF_KIND_STRUCT: return "struct";
1981         case BTF_KIND_UNION: return "union";
1982         case BTF_KIND_ENUM: return "enum";
1983         case BTF_KIND_FWD: return "fwd";
1984         case BTF_KIND_TYPEDEF: return "typedef";
1985         case BTF_KIND_VOLATILE: return "volatile";
1986         case BTF_KIND_CONST: return "const";
1987         case BTF_KIND_RESTRICT: return "restrict";
1988         case BTF_KIND_FUNC: return "func";
1989         case BTF_KIND_FUNC_PROTO: return "func_proto";
1990         case BTF_KIND_VAR: return "var";
1991         case BTF_KIND_DATASEC: return "datasec";
1992         case BTF_KIND_FLOAT: return "float";
1993         default: return "unknown";
1994         }
1995 }
1996
1997 const char *btf_kind_str(const struct btf_type *t)
1998 {
1999         return __btf_kind_str(btf_kind(t));
2000 }
2001
2002 /*
2003  * Fetch integer attribute of BTF map definition. Such attributes are
2004  * represented using a pointer to an array, in which dimensionality of array
2005  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2006  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2007  * type definition, while using only sizeof(void *) space in ELF data section.
2008  */
2009 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2010                               const struct btf_member *m, __u32 *res)
2011 {
2012         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2013         const char *name = btf__name_by_offset(btf, m->name_off);
2014         const struct btf_array *arr_info;
2015         const struct btf_type *arr_t;
2016
2017         if (!btf_is_ptr(t)) {
2018                 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2019                         map_name, name, btf_kind_str(t));
2020                 return false;
2021         }
2022
2023         arr_t = btf__type_by_id(btf, t->type);
2024         if (!arr_t) {
2025                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2026                         map_name, name, t->type);
2027                 return false;
2028         }
2029         if (!btf_is_array(arr_t)) {
2030                 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2031                         map_name, name, btf_kind_str(arr_t));
2032                 return false;
2033         }
2034         arr_info = btf_array(arr_t);
2035         *res = arr_info->nelems;
2036         return true;
2037 }
2038
2039 static int build_map_pin_path(struct bpf_map *map, const char *path)
2040 {
2041         char buf[PATH_MAX];
2042         int len;
2043
2044         if (!path)
2045                 path = "/sys/fs/bpf";
2046
2047         len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
2048         if (len < 0)
2049                 return -EINVAL;
2050         else if (len >= PATH_MAX)
2051                 return -ENAMETOOLONG;
2052
2053         return bpf_map__set_pin_path(map, buf);
2054 }
2055
2056 int parse_btf_map_def(const char *map_name, struct btf *btf,
2057                       const struct btf_type *def_t, bool strict,
2058                       struct btf_map_def *map_def, struct btf_map_def *inner_def)
2059 {
2060         const struct btf_type *t;
2061         const struct btf_member *m;
2062         bool is_inner = inner_def == NULL;
2063         int vlen, i;
2064
2065         vlen = btf_vlen(def_t);
2066         m = btf_members(def_t);
2067         for (i = 0; i < vlen; i++, m++) {
2068                 const char *name = btf__name_by_offset(btf, m->name_off);
2069
2070                 if (!name) {
2071                         pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2072                         return -EINVAL;
2073                 }
2074                 if (strcmp(name, "type") == 0) {
2075                         if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2076                                 return -EINVAL;
2077                         map_def->parts |= MAP_DEF_MAP_TYPE;
2078                 } else if (strcmp(name, "max_entries") == 0) {
2079                         if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2080                                 return -EINVAL;
2081                         map_def->parts |= MAP_DEF_MAX_ENTRIES;
2082                 } else if (strcmp(name, "map_flags") == 0) {
2083                         if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2084                                 return -EINVAL;
2085                         map_def->parts |= MAP_DEF_MAP_FLAGS;
2086                 } else if (strcmp(name, "numa_node") == 0) {
2087                         if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2088                                 return -EINVAL;
2089                         map_def->parts |= MAP_DEF_NUMA_NODE;
2090                 } else if (strcmp(name, "key_size") == 0) {
2091                         __u32 sz;
2092
2093                         if (!get_map_field_int(map_name, btf, m, &sz))
2094                                 return -EINVAL;
2095                         if (map_def->key_size && map_def->key_size != sz) {
2096                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
2097                                         map_name, map_def->key_size, sz);
2098                                 return -EINVAL;
2099                         }
2100                         map_def->key_size = sz;
2101                         map_def->parts |= MAP_DEF_KEY_SIZE;
2102                 } else if (strcmp(name, "key") == 0) {
2103                         __s64 sz;
2104
2105                         t = btf__type_by_id(btf, m->type);
2106                         if (!t) {
2107                                 pr_warn("map '%s': key type [%d] not found.\n",
2108                                         map_name, m->type);
2109                                 return -EINVAL;
2110                         }
2111                         if (!btf_is_ptr(t)) {
2112                                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2113                                         map_name, btf_kind_str(t));
2114                                 return -EINVAL;
2115                         }
2116                         sz = btf__resolve_size(btf, t->type);
2117                         if (sz < 0) {
2118                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2119                                         map_name, t->type, (ssize_t)sz);
2120                                 return sz;
2121                         }
2122                         if (map_def->key_size && map_def->key_size != sz) {
2123                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2124                                         map_name, map_def->key_size, (ssize_t)sz);
2125                                 return -EINVAL;
2126                         }
2127                         map_def->key_size = sz;
2128                         map_def->key_type_id = t->type;
2129                         map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2130                 } else if (strcmp(name, "value_size") == 0) {
2131                         __u32 sz;
2132
2133                         if (!get_map_field_int(map_name, btf, m, &sz))
2134                                 return -EINVAL;
2135                         if (map_def->value_size && map_def->value_size != sz) {
2136                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2137                                         map_name, map_def->value_size, sz);
2138                                 return -EINVAL;
2139                         }
2140                         map_def->value_size = sz;
2141                         map_def->parts |= MAP_DEF_VALUE_SIZE;
2142                 } else if (strcmp(name, "value") == 0) {
2143                         __s64 sz;
2144
2145                         t = btf__type_by_id(btf, m->type);
2146                         if (!t) {
2147                                 pr_warn("map '%s': value type [%d] not found.\n",
2148                                         map_name, m->type);
2149                                 return -EINVAL;
2150                         }
2151                         if (!btf_is_ptr(t)) {
2152                                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2153                                         map_name, btf_kind_str(t));
2154                                 return -EINVAL;
2155                         }
2156                         sz = btf__resolve_size(btf, t->type);
2157                         if (sz < 0) {
2158                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2159                                         map_name, t->type, (ssize_t)sz);
2160                                 return sz;
2161                         }
2162                         if (map_def->value_size && map_def->value_size != sz) {
2163                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2164                                         map_name, map_def->value_size, (ssize_t)sz);
2165                                 return -EINVAL;
2166                         }
2167                         map_def->value_size = sz;
2168                         map_def->value_type_id = t->type;
2169                         map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2170                 }
2171                 else if (strcmp(name, "values") == 0) {
2172                         char inner_map_name[128];
2173                         int err;
2174
2175                         if (is_inner) {
2176                                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2177                                         map_name);
2178                                 return -ENOTSUP;
2179                         }
2180                         if (i != vlen - 1) {
2181                                 pr_warn("map '%s': '%s' member should be last.\n",
2182                                         map_name, name);
2183                                 return -EINVAL;
2184                         }
2185                         if (!bpf_map_type__is_map_in_map(map_def->map_type)) {
2186                                 pr_warn("map '%s': should be map-in-map.\n",
2187                                         map_name);
2188                                 return -ENOTSUP;
2189                         }
2190                         if (map_def->value_size && map_def->value_size != 4) {
2191                                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2192                                         map_name, map_def->value_size);
2193                                 return -EINVAL;
2194                         }
2195                         map_def->value_size = 4;
2196                         t = btf__type_by_id(btf, m->type);
2197                         if (!t) {
2198                                 pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
2199                                         map_name, m->type);
2200                                 return -EINVAL;
2201                         }
2202                         if (!btf_is_array(t) || btf_array(t)->nelems) {
2203                                 pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
2204                                         map_name);
2205                                 return -EINVAL;
2206                         }
2207                         t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2208                         if (!btf_is_ptr(t)) {
2209                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2210                                         map_name, btf_kind_str(t));
2211                                 return -EINVAL;
2212                         }
2213                         t = skip_mods_and_typedefs(btf, t->type, NULL);
2214                         if (!btf_is_struct(t)) {
2215                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2216                                         map_name, btf_kind_str(t));
2217                                 return -EINVAL;
2218                         }
2219
2220                         snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2221                         err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2222                         if (err)
2223                                 return err;
2224
2225                         map_def->parts |= MAP_DEF_INNER_MAP;
2226                 } else if (strcmp(name, "pinning") == 0) {
2227                         __u32 val;
2228
2229                         if (is_inner) {
2230                                 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2231                                 return -EINVAL;
2232                         }
2233                         if (!get_map_field_int(map_name, btf, m, &val))
2234                                 return -EINVAL;
2235                         if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2236                                 pr_warn("map '%s': invalid pinning value %u.\n",
2237                                         map_name, val);
2238                                 return -EINVAL;
2239                         }
2240                         map_def->pinning = val;
2241                         map_def->parts |= MAP_DEF_PINNING;
2242                 } else {
2243                         if (strict) {
2244                                 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2245                                 return -ENOTSUP;
2246                         }
2247                         pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2248                 }
2249         }
2250
2251         if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2252                 pr_warn("map '%s': map type isn't specified.\n", map_name);
2253                 return -EINVAL;
2254         }
2255
2256         return 0;
2257 }
2258
2259 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2260 {
2261         map->def.type = def->map_type;
2262         map->def.key_size = def->key_size;
2263         map->def.value_size = def->value_size;
2264         map->def.max_entries = def->max_entries;
2265         map->def.map_flags = def->map_flags;
2266
2267         map->numa_node = def->numa_node;
2268         map->btf_key_type_id = def->key_type_id;
2269         map->btf_value_type_id = def->value_type_id;
2270
2271         if (def->parts & MAP_DEF_MAP_TYPE)
2272                 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2273
2274         if (def->parts & MAP_DEF_KEY_TYPE)
2275                 pr_debug("map '%s': found key [%u], sz = %u.\n",
2276                          map->name, def->key_type_id, def->key_size);
2277         else if (def->parts & MAP_DEF_KEY_SIZE)
2278                 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2279
2280         if (def->parts & MAP_DEF_VALUE_TYPE)
2281                 pr_debug("map '%s': found value [%u], sz = %u.\n",
2282                          map->name, def->value_type_id, def->value_size);
2283         else if (def->parts & MAP_DEF_VALUE_SIZE)
2284                 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2285
2286         if (def->parts & MAP_DEF_MAX_ENTRIES)
2287                 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2288         if (def->parts & MAP_DEF_MAP_FLAGS)
2289                 pr_debug("map '%s': found map_flags = %u.\n", map->name, def->map_flags);
2290         if (def->parts & MAP_DEF_PINNING)
2291                 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2292         if (def->parts & MAP_DEF_NUMA_NODE)
2293                 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2294
2295         if (def->parts & MAP_DEF_INNER_MAP)
2296                 pr_debug("map '%s': found inner map definition.\n", map->name);
2297 }
2298
2299 static const char *btf_var_linkage_str(__u32 linkage)
2300 {
2301         switch (linkage) {
2302         case BTF_VAR_STATIC: return "static";
2303         case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2304         case BTF_VAR_GLOBAL_EXTERN: return "extern";
2305         default: return "unknown";
2306         }
2307 }
2308
2309 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2310                                          const struct btf_type *sec,
2311                                          int var_idx, int sec_idx,
2312                                          const Elf_Data *data, bool strict,
2313                                          const char *pin_root_path)
2314 {
2315         struct btf_map_def map_def = {}, inner_def = {};
2316         const struct btf_type *var, *def;
2317         const struct btf_var_secinfo *vi;
2318         const struct btf_var *var_extra;
2319         const char *map_name;
2320         struct bpf_map *map;
2321         int err;
2322
2323         vi = btf_var_secinfos(sec) + var_idx;
2324         var = btf__type_by_id(obj->btf, vi->type);
2325         var_extra = btf_var(var);
2326         map_name = btf__name_by_offset(obj->btf, var->name_off);
2327
2328         if (map_name == NULL || map_name[0] == '\0') {
2329                 pr_warn("map #%d: empty name.\n", var_idx);
2330                 return -EINVAL;
2331         }
2332         if ((__u64)vi->offset + vi->size > data->d_size) {
2333                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2334                 return -EINVAL;
2335         }
2336         if (!btf_is_var(var)) {
2337                 pr_warn("map '%s': unexpected var kind %s.\n",
2338                         map_name, btf_kind_str(var));
2339                 return -EINVAL;
2340         }
2341         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2342                 pr_warn("map '%s': unsupported map linkage %s.\n",
2343                         map_name, btf_var_linkage_str(var_extra->linkage));
2344                 return -EOPNOTSUPP;
2345         }
2346
2347         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2348         if (!btf_is_struct(def)) {
2349                 pr_warn("map '%s': unexpected def kind %s.\n",
2350                         map_name, btf_kind_str(var));
2351                 return -EINVAL;
2352         }
2353         if (def->size > vi->size) {
2354                 pr_warn("map '%s': invalid def size.\n", map_name);
2355                 return -EINVAL;
2356         }
2357
2358         map = bpf_object__add_map(obj);
2359         if (IS_ERR(map))
2360                 return PTR_ERR(map);
2361         map->name = strdup(map_name);
2362         if (!map->name) {
2363                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2364                 return -ENOMEM;
2365         }
2366         map->libbpf_type = LIBBPF_MAP_UNSPEC;
2367         map->def.type = BPF_MAP_TYPE_UNSPEC;
2368         map->sec_idx = sec_idx;
2369         map->sec_offset = vi->offset;
2370         map->btf_var_idx = var_idx;
2371         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2372                  map_name, map->sec_idx, map->sec_offset);
2373
2374         err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2375         if (err)
2376                 return err;
2377
2378         fill_map_from_def(map, &map_def);
2379
2380         if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2381                 err = build_map_pin_path(map, pin_root_path);
2382                 if (err) {
2383                         pr_warn("map '%s': couldn't build pin path.\n", map->name);
2384                         return err;
2385                 }
2386         }
2387
2388         if (map_def.parts & MAP_DEF_INNER_MAP) {
2389                 map->inner_map = calloc(1, sizeof(*map->inner_map));
2390                 if (!map->inner_map)
2391                         return -ENOMEM;
2392                 map->inner_map->fd = -1;
2393                 map->inner_map->sec_idx = sec_idx;
2394                 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2395                 if (!map->inner_map->name)
2396                         return -ENOMEM;
2397                 sprintf(map->inner_map->name, "%s.inner", map_name);
2398
2399                 fill_map_from_def(map->inner_map, &inner_def);
2400         }
2401
2402         return 0;
2403 }
2404
2405 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2406                                           const char *pin_root_path)
2407 {
2408         const struct btf_type *sec = NULL;
2409         int nr_types, i, vlen, err;
2410         const struct btf_type *t;
2411         const char *name;
2412         Elf_Data *data;
2413         Elf_Scn *scn;
2414
2415         if (obj->efile.btf_maps_shndx < 0)
2416                 return 0;
2417
2418         scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2419         data = elf_sec_data(obj, scn);
2420         if (!scn || !data) {
2421                 pr_warn("elf: failed to get %s map definitions for %s\n",
2422                         MAPS_ELF_SEC, obj->path);
2423                 return -EINVAL;
2424         }
2425
2426         nr_types = btf__get_nr_types(obj->btf);
2427         for (i = 1; i <= nr_types; i++) {
2428                 t = btf__type_by_id(obj->btf, i);
2429                 if (!btf_is_datasec(t))
2430                         continue;
2431                 name = btf__name_by_offset(obj->btf, t->name_off);
2432                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2433                         sec = t;
2434                         obj->efile.btf_maps_sec_btf_id = i;
2435                         break;
2436                 }
2437         }
2438
2439         if (!sec) {
2440                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2441                 return -ENOENT;
2442         }
2443
2444         vlen = btf_vlen(sec);
2445         for (i = 0; i < vlen; i++) {
2446                 err = bpf_object__init_user_btf_map(obj, sec, i,
2447                                                     obj->efile.btf_maps_shndx,
2448                                                     data, strict,
2449                                                     pin_root_path);
2450                 if (err)
2451                         return err;
2452         }
2453
2454         return 0;
2455 }
2456
2457 static int bpf_object__init_maps(struct bpf_object *obj,
2458                                  const struct bpf_object_open_opts *opts)
2459 {
2460         const char *pin_root_path;
2461         bool strict;
2462         int err;
2463
2464         strict = !OPTS_GET(opts, relaxed_maps, false);
2465         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2466
2467         err = bpf_object__init_user_maps(obj, strict);
2468         err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2469         err = err ?: bpf_object__init_global_data_maps(obj);
2470         err = err ?: bpf_object__init_kconfig_map(obj);
2471         err = err ?: bpf_object__init_struct_ops_maps(obj);
2472
2473         return err;
2474 }
2475
2476 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2477 {
2478         GElf_Shdr sh;
2479
2480         if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
2481                 return false;
2482
2483         return sh.sh_flags & SHF_EXECINSTR;
2484 }
2485
2486 static bool btf_needs_sanitization(struct bpf_object *obj)
2487 {
2488         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2489         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2490         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2491         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2492
2493         return !has_func || !has_datasec || !has_func_global || !has_float;
2494 }
2495
2496 static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2497 {
2498         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2499         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2500         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2501         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2502         struct btf_type *t;
2503         int i, j, vlen;
2504
2505         for (i = 1; i <= btf__get_nr_types(btf); i++) {
2506                 t = (struct btf_type *)btf__type_by_id(btf, i);
2507
2508                 if (!has_datasec && btf_is_var(t)) {
2509                         /* replace VAR with INT */
2510                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2511                         /*
2512                          * using size = 1 is the safest choice, 4 will be too
2513                          * big and cause kernel BTF validation failure if
2514                          * original variable took less than 4 bytes
2515                          */
2516                         t->size = 1;
2517                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2518                 } else if (!has_datasec && btf_is_datasec(t)) {
2519                         /* replace DATASEC with STRUCT */
2520                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
2521                         struct btf_member *m = btf_members(t);
2522                         struct btf_type *vt;
2523                         char *name;
2524
2525                         name = (char *)btf__name_by_offset(btf, t->name_off);
2526                         while (*name) {
2527                                 if (*name == '.')
2528                                         *name = '_';
2529                                 name++;
2530                         }
2531
2532                         vlen = btf_vlen(t);
2533                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2534                         for (j = 0; j < vlen; j++, v++, m++) {
2535                                 /* order of field assignments is important */
2536                                 m->offset = v->offset * 8;
2537                                 m->type = v->type;
2538                                 /* preserve variable name as member name */
2539                                 vt = (void *)btf__type_by_id(btf, v->type);
2540                                 m->name_off = vt->name_off;
2541                         }
2542                 } else if (!has_func && btf_is_func_proto(t)) {
2543                         /* replace FUNC_PROTO with ENUM */
2544                         vlen = btf_vlen(t);
2545                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2546                         t->size = sizeof(__u32); /* kernel enforced */
2547                 } else if (!has_func && btf_is_func(t)) {
2548                         /* replace FUNC with TYPEDEF */
2549                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2550                 } else if (!has_func_global && btf_is_func(t)) {
2551                         /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2552                         t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2553                 } else if (!has_float && btf_is_float(t)) {
2554                         /* replace FLOAT with an equally-sized empty STRUCT;
2555                          * since C compilers do not accept e.g. "float" as a
2556                          * valid struct name, make it anonymous
2557                          */
2558                         t->name_off = 0;
2559                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
2560                 }
2561         }
2562 }
2563
2564 static bool libbpf_needs_btf(const struct bpf_object *obj)
2565 {
2566         return obj->efile.btf_maps_shndx >= 0 ||
2567                obj->efile.st_ops_shndx >= 0 ||
2568                obj->nr_extern > 0;
2569 }
2570
2571 static bool kernel_needs_btf(const struct bpf_object *obj)
2572 {
2573         return obj->efile.st_ops_shndx >= 0;
2574 }
2575
2576 static int bpf_object__init_btf(struct bpf_object *obj,
2577                                 Elf_Data *btf_data,
2578                                 Elf_Data *btf_ext_data)
2579 {
2580         int err = -ENOENT;
2581
2582         if (btf_data) {
2583                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2584                 err = libbpf_get_error(obj->btf);
2585                 if (err) {
2586                         obj->btf = NULL;
2587                         pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
2588                         goto out;
2589                 }
2590                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2591                 btf__set_pointer_size(obj->btf, 8);
2592         }
2593         if (btf_ext_data) {
2594                 if (!obj->btf) {
2595                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2596                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2597                         goto out;
2598                 }
2599                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
2600                 err = libbpf_get_error(obj->btf_ext);
2601                 if (err) {
2602                         pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
2603                                 BTF_EXT_ELF_SEC, err);
2604                         obj->btf_ext = NULL;
2605                         goto out;
2606                 }
2607         }
2608 out:
2609         if (err && libbpf_needs_btf(obj)) {
2610                 pr_warn("BTF is required, but is missing or corrupted.\n");
2611                 return err;
2612         }
2613         return 0;
2614 }
2615
2616 static int bpf_object__finalize_btf(struct bpf_object *obj)
2617 {
2618         int err;
2619
2620         if (!obj->btf)
2621                 return 0;
2622
2623         err = btf__finalize_data(obj, obj->btf);
2624         if (err) {
2625                 pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
2626                 return err;
2627         }
2628
2629         return 0;
2630 }
2631
2632 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
2633 {
2634         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
2635             prog->type == BPF_PROG_TYPE_LSM)
2636                 return true;
2637
2638         /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
2639          * also need vmlinux BTF
2640          */
2641         if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
2642                 return true;
2643
2644         return false;
2645 }
2646
2647 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
2648 {
2649         struct bpf_program *prog;
2650         int i;
2651
2652         /* CO-RE relocations need kernel BTF, only when btf_custom_path
2653          * is not specified
2654          */
2655         if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
2656                 return true;
2657
2658         /* Support for typed ksyms needs kernel BTF */
2659         for (i = 0; i < obj->nr_extern; i++) {
2660                 const struct extern_desc *ext;
2661
2662                 ext = &obj->externs[i];
2663                 if (ext->type == EXT_KSYM && ext->ksym.type_id)
2664                         return true;
2665         }
2666
2667         bpf_object__for_each_program(prog, obj) {
2668                 if (!prog->load)
2669                         continue;
2670                 if (prog_needs_vmlinux_btf(prog))
2671                         return true;
2672         }
2673
2674         return false;
2675 }
2676
2677 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
2678 {
2679         int err;
2680
2681         /* btf_vmlinux could be loaded earlier */
2682         if (obj->btf_vmlinux || obj->gen_loader)
2683                 return 0;
2684
2685         if (!force && !obj_needs_vmlinux_btf(obj))
2686                 return 0;
2687
2688         obj->btf_vmlinux = libbpf_find_kernel_btf();
2689         err = libbpf_get_error(obj->btf_vmlinux);
2690         if (err) {
2691                 pr_warn("Error loading vmlinux BTF: %d\n", err);
2692                 obj->btf_vmlinux = NULL;
2693                 return err;
2694         }
2695         return 0;
2696 }
2697
2698 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
2699 {
2700         struct btf *kern_btf = obj->btf;
2701         bool btf_mandatory, sanitize;
2702         int i, err = 0;
2703
2704         if (!obj->btf)
2705                 return 0;
2706
2707         if (!kernel_supports(obj, FEAT_BTF)) {
2708                 if (kernel_needs_btf(obj)) {
2709                         err = -EOPNOTSUPP;
2710                         goto report;
2711                 }
2712                 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
2713                 return 0;
2714         }
2715
2716         /* Even though some subprogs are global/weak, user might prefer more
2717          * permissive BPF verification process that BPF verifier performs for
2718          * static functions, taking into account more context from the caller
2719          * functions. In such case, they need to mark such subprogs with
2720          * __attribute__((visibility("hidden"))) and libbpf will adjust
2721          * corresponding FUNC BTF type to be marked as static and trigger more
2722          * involved BPF verification process.
2723          */
2724         for (i = 0; i < obj->nr_programs; i++) {
2725                 struct bpf_program *prog = &obj->programs[i];
2726                 struct btf_type *t;
2727                 const char *name;
2728                 int j, n;
2729
2730                 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
2731                         continue;
2732
2733                 n = btf__get_nr_types(obj->btf);
2734                 for (j = 1; j <= n; j++) {
2735                         t = btf_type_by_id(obj->btf, j);
2736                         if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
2737                                 continue;
2738
2739                         name = btf__str_by_offset(obj->btf, t->name_off);
2740                         if (strcmp(name, prog->name) != 0)
2741                                 continue;
2742
2743                         t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
2744                         break;
2745                 }
2746         }
2747
2748         sanitize = btf_needs_sanitization(obj);
2749         if (sanitize) {
2750                 const void *raw_data;
2751                 __u32 sz;
2752
2753                 /* clone BTF to sanitize a copy and leave the original intact */
2754                 raw_data = btf__get_raw_data(obj->btf, &sz);
2755                 kern_btf = btf__new(raw_data, sz);
2756                 err = libbpf_get_error(kern_btf);
2757                 if (err)
2758                         return err;
2759
2760                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2761                 btf__set_pointer_size(obj->btf, 8);
2762                 bpf_object__sanitize_btf(obj, kern_btf);
2763         }
2764
2765         if (obj->gen_loader) {
2766                 __u32 raw_size = 0;
2767                 const void *raw_data = btf__get_raw_data(kern_btf, &raw_size);
2768
2769                 if (!raw_data)
2770                         return -ENOMEM;
2771                 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
2772                 /* Pretend to have valid FD to pass various fd >= 0 checks.
2773                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
2774                  */
2775                 btf__set_fd(kern_btf, 0);
2776         } else {
2777                 err = btf__load(kern_btf);
2778         }
2779         if (sanitize) {
2780                 if (!err) {
2781                         /* move fd to libbpf's BTF */
2782                         btf__set_fd(obj->btf, btf__fd(kern_btf));
2783                         btf__set_fd(kern_btf, -1);
2784                 }
2785                 btf__free(kern_btf);
2786         }
2787 report:
2788         if (err) {
2789                 btf_mandatory = kernel_needs_btf(obj);
2790                 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
2791                         btf_mandatory ? "BTF is mandatory, can't proceed."
2792                                       : "BTF is optional, ignoring.");
2793                 if (!btf_mandatory)
2794                         err = 0;
2795         }
2796         return err;
2797 }
2798
2799 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
2800 {
2801         const char *name;
2802
2803         name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
2804         if (!name) {
2805                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2806                         off, obj->path, elf_errmsg(-1));
2807                 return NULL;
2808         }
2809
2810         return name;
2811 }
2812
2813 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
2814 {
2815         const char *name;
2816
2817         name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
2818         if (!name) {
2819                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2820                         off, obj->path, elf_errmsg(-1));
2821                 return NULL;
2822         }
2823
2824         return name;
2825 }
2826
2827 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
2828 {
2829         Elf_Scn *scn;
2830
2831         scn = elf_getscn(obj->efile.elf, idx);
2832         if (!scn) {
2833                 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
2834                         idx, obj->path, elf_errmsg(-1));
2835                 return NULL;
2836         }
2837         return scn;
2838 }
2839
2840 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
2841 {
2842         Elf_Scn *scn = NULL;
2843         Elf *elf = obj->efile.elf;
2844         const char *sec_name;
2845
2846         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2847                 sec_name = elf_sec_name(obj, scn);
2848                 if (!sec_name)
2849                         return NULL;
2850
2851                 if (strcmp(sec_name, name) != 0)
2852                         continue;
2853
2854                 return scn;
2855         }
2856         return NULL;
2857 }
2858
2859 static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
2860 {
2861         if (!scn)
2862                 return -EINVAL;
2863
2864         if (gelf_getshdr(scn, hdr) != hdr) {
2865                 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
2866                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2867                 return -EINVAL;
2868         }
2869
2870         return 0;
2871 }
2872
2873 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
2874 {
2875         const char *name;
2876         GElf_Shdr sh;
2877
2878         if (!scn)
2879                 return NULL;
2880
2881         if (elf_sec_hdr(obj, scn, &sh))
2882                 return NULL;
2883
2884         name = elf_sec_str(obj, sh.sh_name);
2885         if (!name) {
2886                 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
2887                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2888                 return NULL;
2889         }
2890
2891         return name;
2892 }
2893
2894 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
2895 {
2896         Elf_Data *data;
2897
2898         if (!scn)
2899                 return NULL;
2900
2901         data = elf_getdata(scn, 0);
2902         if (!data) {
2903                 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
2904                         elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
2905                         obj->path, elf_errmsg(-1));
2906                 return NULL;
2907         }
2908
2909         return data;
2910 }
2911
2912 static bool is_sec_name_dwarf(const char *name)
2913 {
2914         /* approximation, but the actual list is too long */
2915         return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
2916 }
2917
2918 static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
2919 {
2920         /* no special handling of .strtab */
2921         if (hdr->sh_type == SHT_STRTAB)
2922                 return true;
2923
2924         /* ignore .llvm_addrsig section as well */
2925         if (hdr->sh_type == SHT_LLVM_ADDRSIG)
2926                 return true;
2927
2928         /* no subprograms will lead to an empty .text section, ignore it */
2929         if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
2930             strcmp(name, ".text") == 0)
2931                 return true;
2932
2933         /* DWARF sections */
2934         if (is_sec_name_dwarf(name))
2935                 return true;
2936
2937         if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
2938                 name += sizeof(".rel") - 1;
2939                 /* DWARF section relocations */
2940                 if (is_sec_name_dwarf(name))
2941                         return true;
2942
2943                 /* .BTF and .BTF.ext don't need relocations */
2944                 if (strcmp(name, BTF_ELF_SEC) == 0 ||
2945                     strcmp(name, BTF_EXT_ELF_SEC) == 0)
2946                         return true;
2947         }
2948
2949         return false;
2950 }
2951
2952 static int cmp_progs(const void *_a, const void *_b)
2953 {
2954         const struct bpf_program *a = _a;
2955         const struct bpf_program *b = _b;
2956
2957         if (a->sec_idx != b->sec_idx)
2958                 return a->sec_idx < b->sec_idx ? -1 : 1;
2959
2960         /* sec_insn_off can't be the same within the section */
2961         return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
2962 }
2963
2964 static int bpf_object__elf_collect(struct bpf_object *obj)
2965 {
2966         Elf *elf = obj->efile.elf;
2967         Elf_Data *btf_ext_data = NULL;
2968         Elf_Data *btf_data = NULL;
2969         int idx = 0, err = 0;
2970         const char *name;
2971         Elf_Data *data;
2972         Elf_Scn *scn;
2973         GElf_Shdr sh;
2974
2975         /* a bunch of ELF parsing functionality depends on processing symbols,
2976          * so do the first pass and find the symbol table
2977          */
2978         scn = NULL;
2979         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2980                 if (elf_sec_hdr(obj, scn, &sh))
2981                         return -LIBBPF_ERRNO__FORMAT;
2982
2983                 if (sh.sh_type == SHT_SYMTAB) {
2984                         if (obj->efile.symbols) {
2985                                 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
2986                                 return -LIBBPF_ERRNO__FORMAT;
2987                         }
2988
2989                         data = elf_sec_data(obj, scn);
2990                         if (!data)
2991                                 return -LIBBPF_ERRNO__FORMAT;
2992
2993                         obj->efile.symbols = data;
2994                         obj->efile.symbols_shndx = elf_ndxscn(scn);
2995                         obj->efile.strtabidx = sh.sh_link;
2996                 }
2997         }
2998
2999         scn = NULL;
3000         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3001                 idx++;
3002
3003                 if (elf_sec_hdr(obj, scn, &sh))
3004                         return -LIBBPF_ERRNO__FORMAT;
3005
3006                 name = elf_sec_str(obj, sh.sh_name);
3007                 if (!name)
3008                         return -LIBBPF_ERRNO__FORMAT;
3009
3010                 if (ignore_elf_section(&sh, name))
3011                         continue;
3012
3013                 data = elf_sec_data(obj, scn);
3014                 if (!data)
3015                         return -LIBBPF_ERRNO__FORMAT;
3016
3017                 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3018                          idx, name, (unsigned long)data->d_size,
3019                          (int)sh.sh_link, (unsigned long)sh.sh_flags,
3020                          (int)sh.sh_type);
3021
3022                 if (strcmp(name, "license") == 0) {
3023                         err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3024                         if (err)
3025                                 return err;
3026                 } else if (strcmp(name, "version") == 0) {
3027                         err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3028                         if (err)
3029                                 return err;
3030                 } else if (strcmp(name, "maps") == 0) {
3031                         obj->efile.maps_shndx = idx;
3032                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3033                         obj->efile.btf_maps_shndx = idx;
3034                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3035                         btf_data = data;
3036                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3037                         btf_ext_data = data;
3038                 } else if (sh.sh_type == SHT_SYMTAB) {
3039                         /* already processed during the first pass above */
3040                 } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
3041                         if (sh.sh_flags & SHF_EXECINSTR) {
3042                                 if (strcmp(name, ".text") == 0)
3043                                         obj->efile.text_shndx = idx;
3044                                 err = bpf_object__add_programs(obj, data, name, idx);
3045                                 if (err)
3046                                         return err;
3047                         } else if (strcmp(name, DATA_SEC) == 0) {
3048                                 obj->efile.data = data;
3049                                 obj->efile.data_shndx = idx;
3050                         } else if (strcmp(name, RODATA_SEC) == 0) {
3051                                 obj->efile.rodata = data;
3052                                 obj->efile.rodata_shndx = idx;
3053                         } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
3054                                 obj->efile.st_ops_data = data;
3055                                 obj->efile.st_ops_shndx = idx;
3056                         } else {
3057                                 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3058                                         idx, name);
3059                         }
3060                 } else if (sh.sh_type == SHT_REL) {
3061                         int nr_sects = obj->efile.nr_reloc_sects;
3062                         void *sects = obj->efile.reloc_sects;
3063                         int sec = sh.sh_info; /* points to other section */
3064
3065                         /* Only do relo for section with exec instructions */
3066                         if (!section_have_execinstr(obj, sec) &&
3067                             strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3068                             strcmp(name, ".rel" MAPS_ELF_SEC)) {
3069                                 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3070                                         idx, name, sec,
3071                                         elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
3072                                 continue;
3073                         }
3074
3075                         sects = libbpf_reallocarray(sects, nr_sects + 1,
3076                                                     sizeof(*obj->efile.reloc_sects));
3077                         if (!sects)
3078                                 return -ENOMEM;
3079
3080                         obj->efile.reloc_sects = sects;
3081                         obj->efile.nr_reloc_sects++;
3082
3083                         obj->efile.reloc_sects[nr_sects].shdr = sh;
3084                         obj->efile.reloc_sects[nr_sects].data = data;
3085                 } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
3086                         obj->efile.bss = data;
3087                         obj->efile.bss_shndx = idx;
3088                 } else {
3089                         pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3090                                 (size_t)sh.sh_size);
3091                 }
3092         }
3093
3094         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3095                 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3096                 return -LIBBPF_ERRNO__FORMAT;
3097         }
3098
3099         /* sort BPF programs by section name and in-section instruction offset
3100          * for faster search */
3101         qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3102
3103         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3104 }
3105
3106 static bool sym_is_extern(const GElf_Sym *sym)
3107 {
3108         int bind = GELF_ST_BIND(sym->st_info);
3109         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3110         return sym->st_shndx == SHN_UNDEF &&
3111                (bind == STB_GLOBAL || bind == STB_WEAK) &&
3112                GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
3113 }
3114
3115 static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
3116 {
3117         int bind = GELF_ST_BIND(sym->st_info);
3118         int type = GELF_ST_TYPE(sym->st_info);
3119
3120         /* in .text section */
3121         if (sym->st_shndx != text_shndx)
3122                 return false;
3123
3124         /* local function */
3125         if (bind == STB_LOCAL && type == STT_SECTION)
3126                 return true;
3127
3128         /* global function */
3129         return bind == STB_GLOBAL && type == STT_FUNC;
3130 }
3131
3132 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3133 {
3134         const struct btf_type *t;
3135         const char *tname;
3136         int i, n;
3137
3138         if (!btf)
3139                 return -ESRCH;
3140
3141         n = btf__get_nr_types(btf);
3142         for (i = 1; i <= n; i++) {
3143                 t = btf__type_by_id(btf, i);
3144
3145                 if (!btf_is_var(t) && !btf_is_func(t))
3146                         continue;
3147
3148                 tname = btf__name_by_offset(btf, t->name_off);
3149                 if (strcmp(tname, ext_name))
3150                         continue;
3151
3152                 if (btf_is_var(t) &&
3153                     btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
3154                         return -EINVAL;
3155
3156                 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
3157                         return -EINVAL;
3158
3159                 return i;
3160         }
3161
3162         return -ENOENT;
3163 }
3164
3165 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
3166         const struct btf_var_secinfo *vs;
3167         const struct btf_type *t;
3168         int i, j, n;
3169
3170         if (!btf)
3171                 return -ESRCH;
3172
3173         n = btf__get_nr_types(btf);
3174         for (i = 1; i <= n; i++) {
3175                 t = btf__type_by_id(btf, i);
3176
3177                 if (!btf_is_datasec(t))
3178                         continue;
3179
3180                 vs = btf_var_secinfos(t);
3181                 for (j = 0; j < btf_vlen(t); j++, vs++) {
3182                         if (vs->type == ext_btf_id)
3183                                 return i;
3184                 }
3185         }
3186
3187         return -ENOENT;
3188 }
3189
3190 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3191                                      bool *is_signed)
3192 {
3193         const struct btf_type *t;
3194         const char *name;
3195
3196         t = skip_mods_and_typedefs(btf, id, NULL);
3197         name = btf__name_by_offset(btf, t->name_off);
3198
3199         if (is_signed)
3200                 *is_signed = false;
3201         switch (btf_kind(t)) {
3202         case BTF_KIND_INT: {
3203                 int enc = btf_int_encoding(t);
3204
3205                 if (enc & BTF_INT_BOOL)
3206                         return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3207                 if (is_signed)
3208                         *is_signed = enc & BTF_INT_SIGNED;
3209                 if (t->size == 1)
3210                         return KCFG_CHAR;
3211                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3212                         return KCFG_UNKNOWN;
3213                 return KCFG_INT;
3214         }
3215         case BTF_KIND_ENUM:
3216                 if (t->size != 4)
3217                         return KCFG_UNKNOWN;
3218                 if (strcmp(name, "libbpf_tristate"))
3219                         return KCFG_UNKNOWN;
3220                 return KCFG_TRISTATE;
3221         case BTF_KIND_ARRAY:
3222                 if (btf_array(t)->nelems == 0)
3223                         return KCFG_UNKNOWN;
3224                 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3225                         return KCFG_UNKNOWN;
3226                 return KCFG_CHAR_ARR;
3227         default:
3228                 return KCFG_UNKNOWN;
3229         }
3230 }
3231
3232 static int cmp_externs(const void *_a, const void *_b)
3233 {
3234         const struct extern_desc *a = _a;
3235         const struct extern_desc *b = _b;
3236
3237         if (a->type != b->type)
3238                 return a->type < b->type ? -1 : 1;
3239
3240         if (a->type == EXT_KCFG) {
3241                 /* descending order by alignment requirements */
3242                 if (a->kcfg.align != b->kcfg.align)
3243                         return a->kcfg.align > b->kcfg.align ? -1 : 1;
3244                 /* ascending order by size, within same alignment class */
3245                 if (a->kcfg.sz != b->kcfg.sz)
3246                         return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3247         }
3248
3249         /* resolve ties by name */
3250         return strcmp(a->name, b->name);
3251 }
3252
3253 static int find_int_btf_id(const struct btf *btf)
3254 {
3255         const struct btf_type *t;
3256         int i, n;
3257
3258         n = btf__get_nr_types(btf);
3259         for (i = 1; i <= n; i++) {
3260                 t = btf__type_by_id(btf, i);
3261
3262                 if (btf_is_int(t) && btf_int_bits(t) == 32)
3263                         return i;
3264         }
3265
3266         return 0;
3267 }
3268
3269 static int add_dummy_ksym_var(struct btf *btf)
3270 {
3271         int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
3272         const struct btf_var_secinfo *vs;
3273         const struct btf_type *sec;
3274
3275         if (!btf)
3276                 return 0;
3277
3278         sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
3279                                             BTF_KIND_DATASEC);
3280         if (sec_btf_id < 0)
3281                 return 0;
3282
3283         sec = btf__type_by_id(btf, sec_btf_id);
3284         vs = btf_var_secinfos(sec);
3285         for (i = 0; i < btf_vlen(sec); i++, vs++) {
3286                 const struct btf_type *vt;
3287
3288                 vt = btf__type_by_id(btf, vs->type);
3289                 if (btf_is_func(vt))
3290                         break;
3291         }
3292
3293         /* No func in ksyms sec.  No need to add dummy var. */
3294         if (i == btf_vlen(sec))
3295                 return 0;
3296
3297         int_btf_id = find_int_btf_id(btf);
3298         dummy_var_btf_id = btf__add_var(btf,
3299                                         "dummy_ksym",
3300                                         BTF_VAR_GLOBAL_ALLOCATED,
3301                                         int_btf_id);
3302         if (dummy_var_btf_id < 0)
3303                 pr_warn("cannot create a dummy_ksym var\n");
3304
3305         return dummy_var_btf_id;
3306 }
3307
3308 static int bpf_object__collect_externs(struct bpf_object *obj)
3309 {
3310         struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3311         const struct btf_type *t;
3312         struct extern_desc *ext;
3313         int i, n, off, dummy_var_btf_id;
3314         const char *ext_name, *sec_name;
3315         Elf_Scn *scn;
3316         GElf_Shdr sh;
3317
3318         if (!obj->efile.symbols)
3319                 return 0;
3320
3321         scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3322         if (elf_sec_hdr(obj, scn, &sh))
3323                 return -LIBBPF_ERRNO__FORMAT;
3324
3325         dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
3326         if (dummy_var_btf_id < 0)
3327                 return dummy_var_btf_id;
3328
3329         n = sh.sh_size / sh.sh_entsize;
3330         pr_debug("looking for externs among %d symbols...\n", n);
3331
3332         for (i = 0; i < n; i++) {
3333                 GElf_Sym sym;
3334
3335                 if (!gelf_getsym(obj->efile.symbols, i, &sym))
3336                         return -LIBBPF_ERRNO__FORMAT;
3337                 if (!sym_is_extern(&sym))
3338                         continue;
3339                 ext_name = elf_sym_str(obj, sym.st_name);
3340                 if (!ext_name || !ext_name[0])
3341                         continue;
3342
3343                 ext = obj->externs;
3344                 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3345                 if (!ext)
3346                         return -ENOMEM;
3347                 obj->externs = ext;
3348                 ext = &ext[obj->nr_extern];
3349                 memset(ext, 0, sizeof(*ext));
3350                 obj->nr_extern++;
3351
3352                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3353                 if (ext->btf_id <= 0) {
3354                         pr_warn("failed to find BTF for extern '%s': %d\n",
3355                                 ext_name, ext->btf_id);
3356                         return ext->btf_id;
3357                 }
3358                 t = btf__type_by_id(obj->btf, ext->btf_id);
3359                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
3360                 ext->sym_idx = i;
3361                 ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
3362
3363                 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3364                 if (ext->sec_btf_id <= 0) {
3365                         pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3366                                 ext_name, ext->btf_id, ext->sec_btf_id);
3367                         return ext->sec_btf_id;
3368                 }
3369                 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3370                 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3371
3372                 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3373                         if (btf_is_func(t)) {
3374                                 pr_warn("extern function %s is unsupported under %s section\n",
3375                                         ext->name, KCONFIG_SEC);
3376                                 return -ENOTSUP;
3377                         }
3378                         kcfg_sec = sec;
3379                         ext->type = EXT_KCFG;
3380                         ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3381                         if (ext->kcfg.sz <= 0) {
3382                                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3383                                         ext_name, ext->kcfg.sz);
3384                                 return ext->kcfg.sz;
3385                         }
3386                         ext->kcfg.align = btf__align_of(obj->btf, t->type);
3387                         if (ext->kcfg.align <= 0) {
3388                                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3389                                         ext_name, ext->kcfg.align);
3390                                 return -EINVAL;
3391                         }
3392                         ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3393                                                         &ext->kcfg.is_signed);
3394                         if (ext->kcfg.type == KCFG_UNKNOWN) {
3395                                 pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
3396                                 return -ENOTSUP;
3397                         }
3398                 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3399                         if (btf_is_func(t) && ext->is_weak) {
3400                                 pr_warn("extern weak function %s is unsupported\n",
3401                                         ext->name);
3402                                 return -ENOTSUP;
3403                         }
3404                         ksym_sec = sec;
3405                         ext->type = EXT_KSYM;
3406                         skip_mods_and_typedefs(obj->btf, t->type,
3407                                                &ext->ksym.type_id);
3408                 } else {
3409                         pr_warn("unrecognized extern section '%s'\n", sec_name);
3410                         return -ENOTSUP;
3411                 }
3412         }
3413         pr_debug("collected %d externs total\n", obj->nr_extern);
3414
3415         if (!obj->nr_extern)
3416                 return 0;
3417
3418         /* sort externs by type, for kcfg ones also by (align, size, name) */
3419         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3420
3421         /* for .ksyms section, we need to turn all externs into allocated
3422          * variables in BTF to pass kernel verification; we do this by
3423          * pretending that each extern is a 8-byte variable
3424          */
3425         if (ksym_sec) {
3426                 /* find existing 4-byte integer type in BTF to use for fake
3427                  * extern variables in DATASEC
3428                  */
3429                 int int_btf_id = find_int_btf_id(obj->btf);
3430                 /* For extern function, a dummy_var added earlier
3431                  * will be used to replace the vs->type and
3432                  * its name string will be used to refill
3433                  * the missing param's name.
3434                  */
3435                 const struct btf_type *dummy_var;
3436
3437                 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
3438                 for (i = 0; i < obj->nr_extern; i++) {
3439                         ext = &obj->externs[i];
3440                         if (ext->type != EXT_KSYM)
3441                                 continue;
3442                         pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3443                                  i, ext->sym_idx, ext->name);
3444                 }
3445
3446                 sec = ksym_sec;
3447                 n = btf_vlen(sec);
3448                 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3449                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3450                         struct btf_type *vt;
3451
3452                         vt = (void *)btf__type_by_id(obj->btf, vs->type);
3453                         ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3454                         ext = find_extern_by_name(obj, ext_name);
3455                         if (!ext) {
3456                                 pr_warn("failed to find extern definition for BTF %s '%s'\n",
3457                                         btf_kind_str(vt), ext_name);
3458                                 return -ESRCH;
3459                         }
3460                         if (btf_is_func(vt)) {
3461                                 const struct btf_type *func_proto;
3462                                 struct btf_param *param;
3463                                 int j;
3464
3465                                 func_proto = btf__type_by_id(obj->btf,
3466                                                              vt->type);
3467                                 param = btf_params(func_proto);
3468                                 /* Reuse the dummy_var string if the
3469                                  * func proto does not have param name.
3470                                  */
3471                                 for (j = 0; j < btf_vlen(func_proto); j++)
3472                                         if (param[j].type && !param[j].name_off)
3473                                                 param[j].name_off =
3474                                                         dummy_var->name_off;
3475                                 vs->type = dummy_var_btf_id;
3476                                 vt->info &= ~0xffff;
3477                                 vt->info |= BTF_FUNC_GLOBAL;
3478                         } else {
3479                                 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3480                                 vt->type = int_btf_id;
3481                         }
3482                         vs->offset = off;
3483                         vs->size = sizeof(int);
3484                 }
3485                 sec->size = off;
3486         }
3487
3488         if (kcfg_sec) {
3489                 sec = kcfg_sec;
3490                 /* for kcfg externs calculate their offsets within a .kconfig map */
3491                 off = 0;
3492                 for (i = 0; i < obj->nr_extern; i++) {
3493                         ext = &obj->externs[i];
3494                         if (ext->type != EXT_KCFG)
3495                                 continue;
3496
3497                         ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3498                         off = ext->kcfg.data_off + ext->kcfg.sz;
3499                         pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3500                                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3501                 }
3502                 sec->size = off;
3503                 n = btf_vlen(sec);
3504                 for (i = 0; i < n; i++) {
3505                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3506
3507                         t = btf__type_by_id(obj->btf, vs->type);
3508                         ext_name = btf__name_by_offset(obj->btf, t->name_off);
3509                         ext = find_extern_by_name(obj, ext_name);
3510                         if (!ext) {
3511                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3512                                         ext_name);
3513                                 return -ESRCH;
3514                         }
3515                         btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3516                         vs->offset = ext->kcfg.data_off;
3517                 }
3518         }
3519         return 0;
3520 }
3521
3522 struct bpf_program *
3523 bpf_object__find_program_by_title(const struct bpf_object *obj,
3524                                   const char *title)
3525 {
3526         struct bpf_program *pos;
3527
3528         bpf_object__for_each_program(pos, obj) {
3529                 if (pos->sec_name && !strcmp(pos->sec_name, title))
3530                         return pos;
3531         }
3532         return errno = ENOENT, NULL;
3533 }
3534
3535 static bool prog_is_subprog(const struct bpf_object *obj,
3536                             const struct bpf_program *prog)
3537 {
3538         /* For legacy reasons, libbpf supports an entry-point BPF programs
3539          * without SEC() attribute, i.e., those in the .text section. But if
3540          * there are 2 or more such programs in the .text section, they all
3541          * must be subprograms called from entry-point BPF programs in
3542          * designated SEC()'tions, otherwise there is no way to distinguish
3543          * which of those programs should be loaded vs which are a subprogram.
3544          * Similarly, if there is a function/program in .text and at least one
3545          * other BPF program with custom SEC() attribute, then we just assume
3546          * .text programs are subprograms (even if they are not called from
3547          * other programs), because libbpf never explicitly supported mixing
3548          * SEC()-designated BPF programs and .text entry-point BPF programs.
3549          */
3550         return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
3551 }
3552
3553 struct bpf_program *
3554 bpf_object__find_program_by_name(const struct bpf_object *obj,
3555                                  const char *name)
3556 {
3557         struct bpf_program *prog;
3558
3559         bpf_object__for_each_program(prog, obj) {
3560                 if (prog_is_subprog(obj, prog))
3561                         continue;
3562                 if (!strcmp(prog->name, name))
3563                         return prog;
3564         }
3565         return errno = ENOENT, NULL;
3566 }
3567
3568 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3569                                       int shndx)
3570 {
3571         return shndx == obj->efile.data_shndx ||
3572                shndx == obj->efile.bss_shndx ||
3573                shndx == obj->efile.rodata_shndx;
3574 }
3575
3576 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
3577                                       int shndx)
3578 {
3579         return shndx == obj->efile.maps_shndx ||
3580                shndx == obj->efile.btf_maps_shndx;
3581 }
3582
3583 static enum libbpf_map_type
3584 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
3585 {
3586         if (shndx == obj->efile.data_shndx)
3587                 return LIBBPF_MAP_DATA;
3588         else if (shndx == obj->efile.bss_shndx)
3589                 return LIBBPF_MAP_BSS;
3590         else if (shndx == obj->efile.rodata_shndx)
3591                 return LIBBPF_MAP_RODATA;
3592         else if (shndx == obj->efile.symbols_shndx)
3593                 return LIBBPF_MAP_KCONFIG;
3594         else
3595                 return LIBBPF_MAP_UNSPEC;
3596 }
3597
3598 static int bpf_program__record_reloc(struct bpf_program *prog,
3599                                      struct reloc_desc *reloc_desc,
3600                                      __u32 insn_idx, const char *sym_name,
3601                                      const GElf_Sym *sym, const GElf_Rel *rel)
3602 {
3603         struct bpf_insn *insn = &prog->insns[insn_idx];
3604         size_t map_idx, nr_maps = prog->obj->nr_maps;
3605         struct bpf_object *obj = prog->obj;
3606         __u32 shdr_idx = sym->st_shndx;
3607         enum libbpf_map_type type;
3608         const char *sym_sec_name;
3609         struct bpf_map *map;
3610
3611         if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
3612                 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
3613                         prog->name, sym_name, insn_idx, insn->code);
3614                 return -LIBBPF_ERRNO__RELOC;
3615         }
3616
3617         if (sym_is_extern(sym)) {
3618                 int sym_idx = GELF_R_SYM(rel->r_info);
3619                 int i, n = obj->nr_extern;
3620                 struct extern_desc *ext;
3621
3622                 for (i = 0; i < n; i++) {
3623                         ext = &obj->externs[i];
3624                         if (ext->sym_idx == sym_idx)
3625                                 break;
3626                 }
3627                 if (i >= n) {
3628                         pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
3629                                 prog->name, sym_name, sym_idx);
3630                         return -LIBBPF_ERRNO__RELOC;
3631                 }
3632                 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
3633                          prog->name, i, ext->name, ext->sym_idx, insn_idx);
3634                 if (insn->code == (BPF_JMP | BPF_CALL))
3635                         reloc_desc->type = RELO_EXTERN_FUNC;
3636                 else
3637                         reloc_desc->type = RELO_EXTERN_VAR;
3638                 reloc_desc->insn_idx = insn_idx;
3639                 reloc_desc->sym_off = i; /* sym_off stores extern index */
3640                 return 0;
3641         }
3642
3643         /* sub-program call relocation */
3644         if (is_call_insn(insn)) {
3645                 if (insn->src_reg != BPF_PSEUDO_CALL) {
3646                         pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
3647                         return -LIBBPF_ERRNO__RELOC;
3648                 }
3649                 /* text_shndx can be 0, if no default "main" program exists */
3650                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
3651                         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3652                         pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
3653                                 prog->name, sym_name, sym_sec_name);
3654                         return -LIBBPF_ERRNO__RELOC;
3655                 }
3656                 if (sym->st_value % BPF_INSN_SZ) {
3657                         pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
3658                                 prog->name, sym_name, (size_t)sym->st_value);
3659                         return -LIBBPF_ERRNO__RELOC;
3660                 }
3661                 reloc_desc->type = RELO_CALL;
3662                 reloc_desc->insn_idx = insn_idx;
3663                 reloc_desc->sym_off = sym->st_value;
3664                 return 0;
3665         }
3666
3667         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
3668                 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
3669                         prog->name, sym_name, shdr_idx);
3670                 return -LIBBPF_ERRNO__RELOC;
3671         }
3672
3673         /* loading subprog addresses */
3674         if (sym_is_subprog(sym, obj->efile.text_shndx)) {
3675                 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
3676                  * local_func: sym->st_value = 0, insn->imm = offset in the section.
3677                  */
3678                 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
3679                         pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
3680                                 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
3681                         return -LIBBPF_ERRNO__RELOC;
3682                 }
3683
3684                 reloc_desc->type = RELO_SUBPROG_ADDR;
3685                 reloc_desc->insn_idx = insn_idx;
3686                 reloc_desc->sym_off = sym->st_value;
3687                 return 0;
3688         }
3689
3690         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
3691         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3692
3693         /* generic map reference relocation */
3694         if (type == LIBBPF_MAP_UNSPEC) {
3695                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
3696                         pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
3697                                 prog->name, sym_name, sym_sec_name);
3698                         return -LIBBPF_ERRNO__RELOC;
3699                 }
3700                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3701                         map = &obj->maps[map_idx];
3702                         if (map->libbpf_type != type ||
3703                             map->sec_idx != sym->st_shndx ||
3704                             map->sec_offset != sym->st_value)
3705                                 continue;
3706                         pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
3707                                  prog->name, map_idx, map->name, map->sec_idx,
3708                                  map->sec_offset, insn_idx);
3709                         break;
3710                 }
3711                 if (map_idx >= nr_maps) {
3712                         pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
3713                                 prog->name, sym_sec_name, (size_t)sym->st_value);
3714                         return -LIBBPF_ERRNO__RELOC;
3715                 }
3716                 reloc_desc->type = RELO_LD64;
3717                 reloc_desc->insn_idx = insn_idx;
3718                 reloc_desc->map_idx = map_idx;
3719                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
3720                 return 0;
3721         }
3722
3723         /* global data map relocation */
3724         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
3725                 pr_warn("prog '%s': bad data relo against section '%s'\n",
3726                         prog->name, sym_sec_name);
3727                 return -LIBBPF_ERRNO__RELOC;
3728         }
3729         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3730                 map = &obj->maps[map_idx];
3731                 if (map->libbpf_type != type)
3732                         continue;
3733                 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
3734                          prog->name, map_idx, map->name, map->sec_idx,
3735                          map->sec_offset, insn_idx);
3736                 break;
3737         }
3738         if (map_idx >= nr_maps) {
3739                 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
3740                         prog->name, sym_sec_name);
3741                 return -LIBBPF_ERRNO__RELOC;
3742         }
3743
3744         reloc_desc->type = RELO_DATA;
3745         reloc_desc->insn_idx = insn_idx;
3746         reloc_desc->map_idx = map_idx;
3747         reloc_desc->sym_off = sym->st_value;
3748         return 0;
3749 }
3750
3751 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
3752 {
3753         return insn_idx >= prog->sec_insn_off &&
3754                insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
3755 }
3756
3757 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
3758                                                  size_t sec_idx, size_t insn_idx)
3759 {
3760         int l = 0, r = obj->nr_programs - 1, m;
3761         struct bpf_program *prog;
3762
3763         while (l < r) {
3764                 m = l + (r - l + 1) / 2;
3765                 prog = &obj->programs[m];
3766
3767                 if (prog->sec_idx < sec_idx ||
3768                     (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
3769                         l = m;
3770                 else
3771                         r = m - 1;
3772         }
3773         /* matching program could be at index l, but it still might be the
3774          * wrong one, so we need to double check conditions for the last time
3775          */
3776         prog = &obj->programs[l];
3777         if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
3778                 return prog;
3779         return NULL;
3780 }
3781
3782 static int
3783 bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data)
3784 {
3785         Elf_Data *symbols = obj->efile.symbols;
3786         const char *relo_sec_name, *sec_name;
3787         size_t sec_idx = shdr->sh_info;
3788         struct bpf_program *prog;
3789         struct reloc_desc *relos;
3790         int err, i, nrels;
3791         const char *sym_name;
3792         __u32 insn_idx;
3793         Elf_Scn *scn;
3794         Elf_Data *scn_data;
3795         GElf_Sym sym;
3796         GElf_Rel rel;
3797
3798         scn = elf_sec_by_idx(obj, sec_idx);
3799         scn_data = elf_sec_data(obj, scn);
3800
3801         relo_sec_name = elf_sec_str(obj, shdr->sh_name);
3802         sec_name = elf_sec_name(obj, scn);
3803         if (!relo_sec_name || !sec_name)
3804                 return -EINVAL;
3805
3806         pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
3807                  relo_sec_name, sec_idx, sec_name);
3808         nrels = shdr->sh_size / shdr->sh_entsize;
3809
3810         for (i = 0; i < nrels; i++) {
3811                 if (!gelf_getrel(data, i, &rel)) {
3812                         pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
3813                         return -LIBBPF_ERRNO__FORMAT;
3814                 }
3815                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
3816                         pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
3817                                 relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
3818                         return -LIBBPF_ERRNO__FORMAT;
3819                 }
3820
3821                 if (rel.r_offset % BPF_INSN_SZ || rel.r_offset >= scn_data->d_size) {
3822                         pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
3823                                 relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
3824                         return -LIBBPF_ERRNO__FORMAT;
3825                 }
3826
3827                 insn_idx = rel.r_offset / BPF_INSN_SZ;
3828                 /* relocations against static functions are recorded as
3829                  * relocations against the section that contains a function;
3830                  * in such case, symbol will be STT_SECTION and sym.st_name
3831                  * will point to empty string (0), so fetch section name
3832                  * instead
3833                  */
3834                 if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0)
3835                         sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx));
3836                 else
3837                         sym_name = elf_sym_str(obj, sym.st_name);
3838                 sym_name = sym_name ?: "<?";
3839
3840                 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
3841                          relo_sec_name, i, insn_idx, sym_name);
3842
3843                 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
3844                 if (!prog) {
3845                         pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
3846                                 relo_sec_name, i, sec_name, insn_idx);
3847                         continue;
3848                 }
3849
3850                 relos = libbpf_reallocarray(prog->reloc_desc,
3851                                             prog->nr_reloc + 1, sizeof(*relos));
3852                 if (!relos)
3853                         return -ENOMEM;
3854                 prog->reloc_desc = relos;
3855
3856                 /* adjust insn_idx to local BPF program frame of reference */
3857                 insn_idx -= prog->sec_insn_off;
3858                 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
3859                                                 insn_idx, sym_name, &sym, &rel);
3860                 if (err)
3861                         return err;
3862
3863                 prog->nr_reloc++;
3864         }
3865         return 0;
3866 }
3867
3868 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
3869 {
3870         struct bpf_map_def *def = &map->def;
3871         __u32 key_type_id = 0, value_type_id = 0;
3872         int ret;
3873
3874         /* if it's BTF-defined map, we don't need to search for type IDs.
3875          * For struct_ops map, it does not need btf_key_type_id and
3876          * btf_value_type_id.
3877          */
3878         if (map->sec_idx == obj->efile.btf_maps_shndx ||
3879             bpf_map__is_struct_ops(map))
3880                 return 0;
3881
3882         if (!bpf_map__is_internal(map)) {
3883                 ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
3884                                            def->value_size, &key_type_id,
3885                                            &value_type_id);
3886         } else {
3887                 /*
3888                  * LLVM annotates global data differently in BTF, that is,
3889                  * only as '.data', '.bss' or '.rodata'.
3890                  */
3891                 ret = btf__find_by_name(obj->btf,
3892                                 libbpf_type_to_btf_name[map->libbpf_type]);
3893         }
3894         if (ret < 0)
3895                 return ret;
3896
3897         map->btf_key_type_id = key_type_id;
3898         map->btf_value_type_id = bpf_map__is_internal(map) ?
3899                                  ret : value_type_id;
3900         return 0;
3901 }
3902
3903 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
3904 {
3905         char file[PATH_MAX], buff[4096];
3906         FILE *fp;
3907         __u32 val;
3908         int err;
3909
3910         snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
3911         memset(info, 0, sizeof(*info));
3912
3913         fp = fopen(file, "r");
3914         if (!fp) {
3915                 err = -errno;
3916                 pr_warn("failed to open %s: %d. No procfs support?\n", file,
3917                         err);
3918                 return err;
3919         }
3920
3921         while (fgets(buff, sizeof(buff), fp)) {
3922                 if (sscanf(buff, "map_type:\t%u", &val) == 1)
3923                         info->type = val;
3924                 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
3925                         info->key_size = val;
3926                 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
3927                         info->value_size = val;
3928                 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
3929                         info->max_entries = val;
3930                 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
3931                         info->map_flags = val;
3932         }
3933
3934         fclose(fp);
3935
3936         return 0;
3937 }
3938
3939 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
3940 {
3941         struct bpf_map_info info = {};
3942         __u32 len = sizeof(info);
3943         int new_fd, err;
3944         char *new_name;
3945
3946         err = bpf_obj_get_info_by_fd(fd, &info, &len);
3947         if (err && errno == EINVAL)
3948                 err = bpf_get_map_info_from_fdinfo(fd, &info);
3949         if (err)
3950                 return libbpf_err(err);
3951
3952         new_name = strdup(info.name);
3953         if (!new_name)
3954                 return libbpf_err(-errno);
3955
3956         new_fd = open("/", O_RDONLY | O_CLOEXEC);
3957         if (new_fd < 0) {
3958                 err = -errno;
3959                 goto err_free_new_name;
3960         }
3961
3962         new_fd = dup3(fd, new_fd, O_CLOEXEC);
3963         if (new_fd < 0) {
3964                 err = -errno;
3965                 goto err_close_new_fd;
3966         }
3967
3968         err = zclose(map->fd);
3969         if (err) {
3970                 err = -errno;
3971                 goto err_close_new_fd;
3972         }
3973         free(map->name);
3974
3975         map->fd = new_fd;
3976         map->name = new_name;
3977         map->def.type = info.type;
3978         map->def.key_size = info.key_size;
3979         map->def.value_size = info.value_size;
3980         map->def.max_entries = info.max_entries;
3981         map->def.map_flags = info.map_flags;
3982         map->btf_key_type_id = info.btf_key_type_id;
3983         map->btf_value_type_id = info.btf_value_type_id;
3984         map->reused = true;
3985
3986         return 0;
3987
3988 err_close_new_fd:
3989         close(new_fd);
3990 err_free_new_name:
3991         free(new_name);
3992         return libbpf_err(err);
3993 }
3994
3995 __u32 bpf_map__max_entries(const struct bpf_map *map)
3996 {
3997         return map->def.max_entries;
3998 }
3999
4000 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4001 {
4002         if (!bpf_map_type__is_map_in_map(map->def.type))
4003                 return errno = EINVAL, NULL;
4004
4005         return map->inner_map;
4006 }
4007
4008 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
4009 {
4010         if (map->fd >= 0)
4011                 return libbpf_err(-EBUSY);
4012         map->def.max_entries = max_entries;
4013         return 0;
4014 }
4015
4016 int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
4017 {
4018         if (!map || !max_entries)
4019                 return libbpf_err(-EINVAL);
4020
4021         return bpf_map__set_max_entries(map, max_entries);
4022 }
4023
4024 static int
4025 bpf_object__probe_loading(struct bpf_object *obj)
4026 {
4027         struct bpf_load_program_attr attr;
4028         char *cp, errmsg[STRERR_BUFSIZE];
4029         struct bpf_insn insns[] = {
4030                 BPF_MOV64_IMM(BPF_REG_0, 0),
4031                 BPF_EXIT_INSN(),
4032         };
4033         int ret;
4034
4035         if (obj->gen_loader)
4036                 return 0;
4037
4038         /* make sure basic loading works */
4039
4040         memset(&attr, 0, sizeof(attr));
4041         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
4042         attr.insns = insns;
4043         attr.insns_cnt = ARRAY_SIZE(insns);
4044         attr.license = "GPL";
4045
4046         ret = bpf_load_program_xattr(&attr, NULL, 0);
4047         if (ret < 0) {
4048                 attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
4049                 ret = bpf_load_program_xattr(&attr, NULL, 0);
4050         }
4051         if (ret < 0) {
4052                 ret = errno;
4053                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4054                 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
4055                         "program. Make sure your kernel supports BPF "
4056                         "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
4057                         "set to big enough value.\n", __func__, cp, ret);
4058                 return -ret;
4059         }
4060         close(ret);
4061
4062         return 0;
4063 }
4064
4065 static int probe_fd(int fd)
4066 {
4067         if (fd >= 0)
4068                 close(fd);
4069         return fd >= 0;
4070 }
4071
4072 static int probe_kern_prog_name(void)
4073 {
4074         struct bpf_load_program_attr attr;
4075         struct bpf_insn insns[] = {
4076                 BPF_MOV64_IMM(BPF_REG_0, 0),
4077                 BPF_EXIT_INSN(),
4078         };
4079         int ret;
4080
4081         /* make sure loading with name works */
4082
4083         memset(&attr, 0, sizeof(attr));
4084         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
4085         attr.insns = insns;
4086         attr.insns_cnt = ARRAY_SIZE(insns);
4087         attr.license = "GPL";
4088         attr.name = "test";
4089         ret = bpf_load_program_xattr(&attr, NULL, 0);
4090         return probe_fd(ret);
4091 }
4092
4093 static int probe_kern_global_data(void)
4094 {
4095         struct bpf_load_program_attr prg_attr;
4096         struct bpf_create_map_attr map_attr;
4097         char *cp, errmsg[STRERR_BUFSIZE];
4098         struct bpf_insn insns[] = {
4099                 BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
4100                 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
4101                 BPF_MOV64_IMM(BPF_REG_0, 0),
4102                 BPF_EXIT_INSN(),
4103         };
4104         int ret, map;
4105
4106         memset(&map_attr, 0, sizeof(map_attr));
4107         map_attr.map_type = BPF_MAP_TYPE_ARRAY;
4108         map_attr.key_size = sizeof(int);
4109         map_attr.value_size = 32;
4110         map_attr.max_entries = 1;
4111
4112         map = bpf_create_map_xattr(&map_attr);
4113         if (map < 0) {
4114                 ret = -errno;
4115                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4116                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4117                         __func__, cp, -ret);
4118                 return ret;
4119         }
4120
4121         insns[0].imm = map;
4122
4123         memset(&prg_attr, 0, sizeof(prg_attr));
4124         prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
4125         prg_attr.insns = insns;
4126         prg_attr.insns_cnt = ARRAY_SIZE(insns);
4127         prg_attr.license = "GPL";
4128
4129         ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
4130         close(map);
4131         return probe_fd(ret);
4132 }
4133
4134 static int probe_kern_btf(void)
4135 {
4136         static const char strs[] = "\0int";
4137         __u32 types[] = {
4138                 /* int */
4139                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4140         };
4141
4142         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4143                                              strs, sizeof(strs)));
4144 }
4145
4146 static int probe_kern_btf_func(void)
4147 {
4148         static const char strs[] = "\0int\0x\0a";
4149         /* void x(int a) {} */
4150         __u32 types[] = {
4151                 /* int */
4152                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4153                 /* FUNC_PROTO */                                /* [2] */
4154                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4155                 BTF_PARAM_ENC(7, 1),
4156                 /* FUNC x */                                    /* [3] */
4157                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
4158         };
4159
4160         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4161                                              strs, sizeof(strs)));
4162 }
4163
4164 static int probe_kern_btf_func_global(void)
4165 {
4166         static const char strs[] = "\0int\0x\0a";
4167         /* static void x(int a) {} */
4168         __u32 types[] = {
4169                 /* int */
4170                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4171                 /* FUNC_PROTO */                                /* [2] */
4172                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4173                 BTF_PARAM_ENC(7, 1),
4174                 /* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
4175                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
4176         };
4177
4178         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4179                                              strs, sizeof(strs)));
4180 }
4181
4182 static int probe_kern_btf_datasec(void)
4183 {
4184         static const char strs[] = "\0x\0.data";
4185         /* static int a; */
4186         __u32 types[] = {
4187                 /* int */
4188                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4189                 /* VAR x */                                     /* [2] */
4190                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
4191                 BTF_VAR_STATIC,
4192                 /* DATASEC val */                               /* [3] */
4193                 BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
4194                 BTF_VAR_SECINFO_ENC(2, 0, 4),
4195         };
4196
4197         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4198                                              strs, sizeof(strs)));
4199 }
4200
4201 static int probe_kern_btf_float(void)
4202 {
4203         static const char strs[] = "\0float";
4204         __u32 types[] = {
4205                 /* float */
4206                 BTF_TYPE_FLOAT_ENC(1, 4),
4207         };
4208
4209         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4210                                              strs, sizeof(strs)));
4211 }
4212
4213 static int probe_kern_array_mmap(void)
4214 {
4215         struct bpf_create_map_attr attr = {
4216                 .map_type = BPF_MAP_TYPE_ARRAY,
4217                 .map_flags = BPF_F_MMAPABLE,
4218                 .key_size = sizeof(int),
4219                 .value_size = sizeof(int),
4220                 .max_entries = 1,
4221         };
4222
4223         return probe_fd(bpf_create_map_xattr(&attr));
4224 }
4225
4226 static int probe_kern_exp_attach_type(void)
4227 {
4228         struct bpf_load_program_attr attr;
4229         struct bpf_insn insns[] = {
4230                 BPF_MOV64_IMM(BPF_REG_0, 0),
4231                 BPF_EXIT_INSN(),
4232         };
4233
4234         memset(&attr, 0, sizeof(attr));
4235         /* use any valid combination of program type and (optional)
4236          * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
4237          * to see if kernel supports expected_attach_type field for
4238          * BPF_PROG_LOAD command
4239          */
4240         attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
4241         attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
4242         attr.insns = insns;
4243         attr.insns_cnt = ARRAY_SIZE(insns);
4244         attr.license = "GPL";
4245
4246         return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
4247 }
4248
4249 static int probe_kern_probe_read_kernel(void)
4250 {
4251         struct bpf_load_program_attr attr;
4252         struct bpf_insn insns[] = {
4253                 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),   /* r1 = r10 (fp) */
4254                 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),  /* r1 += -8 */
4255                 BPF_MOV64_IMM(BPF_REG_2, 8),            /* r2 = 8 */
4256                 BPF_MOV64_IMM(BPF_REG_3, 0),            /* r3 = 0 */
4257                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
4258                 BPF_EXIT_INSN(),
4259         };
4260
4261         memset(&attr, 0, sizeof(attr));
4262         attr.prog_type = BPF_PROG_TYPE_KPROBE;
4263         attr.insns = insns;
4264         attr.insns_cnt = ARRAY_SIZE(insns);
4265         attr.license = "GPL";
4266
4267         return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
4268 }
4269
4270 static int probe_prog_bind_map(void)
4271 {
4272         struct bpf_load_program_attr prg_attr;
4273         struct bpf_create_map_attr map_attr;
4274         char *cp, errmsg[STRERR_BUFSIZE];
4275         struct bpf_insn insns[] = {
4276                 BPF_MOV64_IMM(BPF_REG_0, 0),
4277                 BPF_EXIT_INSN(),
4278         };
4279         int ret, map, prog;
4280
4281         memset(&map_attr, 0, sizeof(map_attr));
4282         map_attr.map_type = BPF_MAP_TYPE_ARRAY;
4283         map_attr.key_size = sizeof(int);
4284         map_attr.value_size = 32;
4285         map_attr.max_entries = 1;
4286
4287         map = bpf_create_map_xattr(&map_attr);
4288         if (map < 0) {
4289                 ret = -errno;
4290                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4291                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4292                         __func__, cp, -ret);
4293                 return ret;
4294         }
4295
4296         memset(&prg_attr, 0, sizeof(prg_attr));
4297         prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
4298         prg_attr.insns = insns;
4299         prg_attr.insns_cnt = ARRAY_SIZE(insns);
4300         prg_attr.license = "GPL";
4301
4302         prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
4303         if (prog < 0) {
4304                 close(map);
4305                 return 0;
4306         }
4307
4308         ret = bpf_prog_bind_map(prog, map, NULL);
4309
4310         close(map);
4311         close(prog);
4312
4313         return ret >= 0;
4314 }
4315
4316 static int probe_module_btf(void)
4317 {
4318         static const char strs[] = "\0int";
4319         __u32 types[] = {
4320                 /* int */
4321                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4322         };
4323         struct bpf_btf_info info;
4324         __u32 len = sizeof(info);
4325         char name[16];
4326         int fd, err;
4327
4328         fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
4329         if (fd < 0)
4330                 return 0; /* BTF not supported at all */
4331
4332         memset(&info, 0, sizeof(info));
4333         info.name = ptr_to_u64(name);
4334         info.name_len = sizeof(name);
4335
4336         /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
4337          * kernel's module BTF support coincides with support for
4338          * name/name_len fields in struct bpf_btf_info.
4339          */
4340         err = bpf_obj_get_info_by_fd(fd, &info, &len);
4341         close(fd);
4342         return !err;
4343 }
4344
4345 enum kern_feature_result {
4346         FEAT_UNKNOWN = 0,
4347         FEAT_SUPPORTED = 1,
4348         FEAT_MISSING = 2,
4349 };
4350
4351 typedef int (*feature_probe_fn)(void);
4352
4353 static struct kern_feature_desc {
4354         const char *desc;
4355         feature_probe_fn probe;
4356         enum kern_feature_result res;
4357 } feature_probes[__FEAT_CNT] = {
4358         [FEAT_PROG_NAME] = {
4359                 "BPF program name", probe_kern_prog_name,
4360         },
4361         [FEAT_GLOBAL_DATA] = {
4362                 "global variables", probe_kern_global_data,
4363         },
4364         [FEAT_BTF] = {
4365                 "minimal BTF", probe_kern_btf,
4366         },
4367         [FEAT_BTF_FUNC] = {
4368                 "BTF functions", probe_kern_btf_func,
4369         },
4370         [FEAT_BTF_GLOBAL_FUNC] = {
4371                 "BTF global function", probe_kern_btf_func_global,
4372         },
4373         [FEAT_BTF_DATASEC] = {
4374                 "BTF data section and variable", probe_kern_btf_datasec,
4375         },
4376         [FEAT_ARRAY_MMAP] = {
4377                 "ARRAY map mmap()", probe_kern_array_mmap,
4378         },
4379         [FEAT_EXP_ATTACH_TYPE] = {
4380                 "BPF_PROG_LOAD expected_attach_type attribute",
4381                 probe_kern_exp_attach_type,
4382         },
4383         [FEAT_PROBE_READ_KERN] = {
4384                 "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
4385         },
4386         [FEAT_PROG_BIND_MAP] = {
4387                 "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
4388         },
4389         [FEAT_MODULE_BTF] = {
4390                 "module BTF support", probe_module_btf,
4391         },
4392         [FEAT_BTF_FLOAT] = {
4393                 "BTF_KIND_FLOAT support", probe_kern_btf_float,
4394         },
4395 };
4396
4397 static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
4398 {
4399         struct kern_feature_desc *feat = &feature_probes[feat_id];
4400         int ret;
4401
4402         if (obj->gen_loader)
4403                 /* To generate loader program assume the latest kernel
4404                  * to avoid doing extra prog_load, map_create syscalls.
4405                  */
4406                 return true;
4407
4408         if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
4409                 ret = feat->probe();
4410                 if (ret > 0) {
4411                         WRITE_ONCE(feat->res, FEAT_SUPPORTED);
4412                 } else if (ret == 0) {
4413                         WRITE_ONCE(feat->res, FEAT_MISSING);
4414                 } else {
4415                         pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
4416                         WRITE_ONCE(feat->res, FEAT_MISSING);
4417                 }
4418         }
4419
4420         return READ_ONCE(feat->res) == FEAT_SUPPORTED;
4421 }
4422
4423 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4424 {
4425         struct bpf_map_info map_info = {};
4426         char msg[STRERR_BUFSIZE];
4427         __u32 map_info_len;
4428         int err;
4429
4430         map_info_len = sizeof(map_info);
4431
4432         err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
4433         if (err && errno == EINVAL)
4434                 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4435         if (err) {
4436                 pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4437                         libbpf_strerror_r(errno, msg, sizeof(msg)));
4438                 return false;
4439         }
4440
4441         return (map_info.type == map->def.type &&
4442                 map_info.key_size == map->def.key_size &&
4443                 map_info.value_size == map->def.value_size &&
4444                 map_info.max_entries == map->def.max_entries &&
4445                 map_info.map_flags == map->def.map_flags);
4446 }
4447
4448 static int
4449 bpf_object__reuse_map(struct bpf_map *map)
4450 {
4451         char *cp, errmsg[STRERR_BUFSIZE];
4452         int err, pin_fd;
4453
4454         pin_fd = bpf_obj_get(map->pin_path);
4455         if (pin_fd < 0) {
4456                 err = -errno;
4457                 if (err == -ENOENT) {
4458                         pr_debug("found no pinned map to reuse at '%s'\n",
4459                                  map->pin_path);
4460                         return 0;
4461                 }
4462
4463                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4464                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
4465                         map->pin_path, cp);
4466                 return err;
4467         }
4468
4469         if (!map_is_reuse_compat(map, pin_fd)) {
4470                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4471                         map->pin_path);
4472                 close(pin_fd);
4473                 return -EINVAL;
4474         }
4475
4476         err = bpf_map__reuse_fd(map, pin_fd);
4477         if (err) {
4478                 close(pin_fd);
4479                 return err;
4480         }
4481         map->pinned = true;
4482         pr_debug("reused pinned map at '%s'\n", map->pin_path);
4483
4484         return 0;
4485 }
4486
4487 static int
4488 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4489 {
4490         enum libbpf_map_type map_type = map->libbpf_type;
4491         char *cp, errmsg[STRERR_BUFSIZE];
4492         int err, zero = 0;
4493
4494         if (obj->gen_loader) {
4495                 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
4496                                          map->mmaped, map->def.value_size);
4497                 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
4498                         bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
4499                 return 0;
4500         }
4501         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4502         if (err) {
4503                 err = -errno;
4504                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4505                 pr_warn("Error setting initial map(%s) contents: %s\n",
4506                         map->name, cp);
4507                 return err;
4508         }
4509
4510         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4511         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
4512                 err = bpf_map_freeze(map->fd);
4513                 if (err) {
4514                         err = -errno;
4515                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4516                         pr_warn("Error freezing map(%s) as read-only: %s\n",
4517                                 map->name, cp);
4518                         return err;
4519                 }
4520         }
4521         return 0;
4522 }
4523
4524 static void bpf_map__destroy(struct bpf_map *map);
4525
4526 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
4527 {
4528         struct bpf_create_map_attr create_attr;
4529         struct bpf_map_def *def = &map->def;
4530         int err = 0;
4531
4532         memset(&create_attr, 0, sizeof(create_attr));
4533
4534         if (kernel_supports(obj, FEAT_PROG_NAME))
4535                 create_attr.name = map->name;
4536         create_attr.map_ifindex = map->map_ifindex;
4537         create_attr.map_type = def->type;
4538         create_attr.map_flags = def->map_flags;
4539         create_attr.key_size = def->key_size;
4540         create_attr.value_size = def->value_size;
4541         create_attr.numa_node = map->numa_node;
4542
4543         if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
4544                 int nr_cpus;
4545
4546                 nr_cpus = libbpf_num_possible_cpus();
4547                 if (nr_cpus < 0) {
4548                         pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
4549                                 map->name, nr_cpus);
4550                         return nr_cpus;
4551                 }
4552                 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
4553                 create_attr.max_entries = nr_cpus;
4554         } else {
4555                 create_attr.max_entries = def->max_entries;
4556         }
4557
4558         if (bpf_map__is_struct_ops(map))
4559                 create_attr.btf_vmlinux_value_type_id =
4560                         map->btf_vmlinux_value_type_id;
4561
4562         create_attr.btf_fd = 0;
4563         create_attr.btf_key_type_id = 0;
4564         create_attr.btf_value_type_id = 0;
4565         if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
4566                 create_attr.btf_fd = btf__fd(obj->btf);
4567                 create_attr.btf_key_type_id = map->btf_key_type_id;
4568                 create_attr.btf_value_type_id = map->btf_value_type_id;
4569         }
4570
4571         if (bpf_map_type__is_map_in_map(def->type)) {
4572                 if (map->inner_map) {
4573                         err = bpf_object__create_map(obj, map->inner_map, true);
4574                         if (err) {
4575                                 pr_warn("map '%s': failed to create inner map: %d\n",
4576                                         map->name, err);
4577                                 return err;
4578                         }
4579                         map->inner_map_fd = bpf_map__fd(map->inner_map);
4580                 }
4581                 if (map->inner_map_fd >= 0)
4582                         create_attr.inner_map_fd = map->inner_map_fd;
4583         }
4584
4585         if (obj->gen_loader) {
4586                 bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps);
4587                 /* Pretend to have valid FD to pass various fd >= 0 checks.
4588                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
4589                  */
4590                 map->fd = 0;
4591         } else {
4592                 map->fd = bpf_create_map_xattr(&create_attr);
4593         }
4594         if (map->fd < 0 && (create_attr.btf_key_type_id ||
4595                             create_attr.btf_value_type_id)) {
4596                 char *cp, errmsg[STRERR_BUFSIZE];
4597
4598                 err = -errno;
4599                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4600                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
4601                         map->name, cp, err);
4602                 create_attr.btf_fd = 0;
4603                 create_attr.btf_key_type_id = 0;
4604                 create_attr.btf_value_type_id = 0;
4605                 map->btf_key_type_id = 0;
4606                 map->btf_value_type_id = 0;
4607                 map->fd = bpf_create_map_xattr(&create_attr);
4608         }
4609
4610         err = map->fd < 0 ? -errno : 0;
4611
4612         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
4613                 if (obj->gen_loader)
4614                         map->inner_map->fd = -1;
4615                 bpf_map__destroy(map->inner_map);
4616                 zfree(&map->inner_map);
4617         }
4618
4619         return err;
4620 }
4621
4622 static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
4623 {
4624         const struct bpf_map *targ_map;
4625         unsigned int i;
4626         int fd, err = 0;
4627
4628         for (i = 0; i < map->init_slots_sz; i++) {
4629                 if (!map->init_slots[i])
4630                         continue;
4631
4632                 targ_map = map->init_slots[i];
4633                 fd = bpf_map__fd(targ_map);
4634                 if (obj->gen_loader) {
4635                         pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n",
4636                                 map - obj->maps, i, targ_map - obj->maps);
4637                         return -ENOTSUP;
4638                 } else {
4639                         err = bpf_map_update_elem(map->fd, &i, &fd, 0);
4640                 }
4641                 if (err) {
4642                         err = -errno;
4643                         pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
4644                                 map->name, i, targ_map->name,
4645                                 fd, err);
4646                         return err;
4647                 }
4648                 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
4649                          map->name, i, targ_map->name, fd);
4650         }
4651
4652         zfree(&map->init_slots);
4653         map->init_slots_sz = 0;
4654
4655         return 0;
4656 }
4657
4658 static int
4659 bpf_object__create_maps(struct bpf_object *obj)
4660 {
4661         struct bpf_map *map;
4662         char *cp, errmsg[STRERR_BUFSIZE];
4663         unsigned int i, j;
4664         int err;
4665
4666         for (i = 0; i < obj->nr_maps; i++) {
4667                 map = &obj->maps[i];
4668
4669                 if (map->pin_path) {
4670                         err = bpf_object__reuse_map(map);
4671                         if (err) {
4672                                 pr_warn("map '%s': error reusing pinned map\n",
4673                                         map->name);
4674                                 goto err_out;
4675                         }
4676                 }
4677
4678                 if (map->fd >= 0) {
4679                         pr_debug("map '%s': skipping creation (preset fd=%d)\n",
4680                                  map->name, map->fd);
4681                 } else {
4682                         err = bpf_object__create_map(obj, map, false);
4683                         if (err)
4684                                 goto err_out;
4685
4686                         pr_debug("map '%s': created successfully, fd=%d\n",
4687                                  map->name, map->fd);
4688
4689                         if (bpf_map__is_internal(map)) {
4690                                 err = bpf_object__populate_internal_map(obj, map);
4691                                 if (err < 0) {
4692                                         zclose(map->fd);
4693                                         goto err_out;
4694                                 }
4695                         }
4696
4697                         if (map->init_slots_sz) {
4698                                 err = init_map_slots(obj, map);
4699                                 if (err < 0) {
4700                                         zclose(map->fd);
4701                                         goto err_out;
4702                                 }
4703                         }
4704                 }
4705
4706                 if (map->pin_path && !map->pinned) {
4707                         err = bpf_map__pin(map, NULL);
4708                         if (err) {
4709                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
4710                                         map->name, map->pin_path, err);
4711                                 zclose(map->fd);
4712                                 goto err_out;
4713                         }
4714                 }
4715         }
4716
4717         return 0;
4718
4719 err_out:
4720         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4721         pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
4722         pr_perm_msg(err);
4723         for (j = 0; j < i; j++)
4724                 zclose(obj->maps[j].fd);
4725         return err;
4726 }
4727
4728 #define BPF_CORE_SPEC_MAX_LEN 64
4729
4730 /* represents BPF CO-RE field or array element accessor */
4731 struct bpf_core_accessor {
4732         __u32 type_id;          /* struct/union type or array element type */
4733         __u32 idx;              /* field index or array index */
4734         const char *name;       /* field name or NULL for array accessor */
4735 };
4736
4737 struct bpf_core_spec {
4738         const struct btf *btf;
4739         /* high-level spec: named fields and array indices only */
4740         struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
4741         /* original unresolved (no skip_mods_or_typedefs) root type ID */
4742         __u32 root_type_id;
4743         /* CO-RE relocation kind */
4744         enum bpf_core_relo_kind relo_kind;
4745         /* high-level spec length */
4746         int len;
4747         /* raw, low-level spec: 1-to-1 with accessor spec string */
4748         int raw_spec[BPF_CORE_SPEC_MAX_LEN];
4749         /* raw spec length */
4750         int raw_len;
4751         /* field bit offset represented by spec */
4752         __u32 bit_offset;
4753 };
4754
4755 static bool str_is_empty(const char *s)
4756 {
4757         return !s || !s[0];
4758 }
4759
4760 static bool is_flex_arr(const struct btf *btf,
4761                         const struct bpf_core_accessor *acc,
4762                         const struct btf_array *arr)
4763 {
4764         const struct btf_type *t;
4765
4766         /* not a flexible array, if not inside a struct or has non-zero size */
4767         if (!acc->name || arr->nelems > 0)
4768                 return false;
4769
4770         /* has to be the last member of enclosing struct */
4771         t = btf__type_by_id(btf, acc->type_id);
4772         return acc->idx == btf_vlen(t) - 1;
4773 }
4774
4775 static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
4776 {
4777         switch (kind) {
4778         case BPF_FIELD_BYTE_OFFSET: return "byte_off";
4779         case BPF_FIELD_BYTE_SIZE: return "byte_sz";
4780         case BPF_FIELD_EXISTS: return "field_exists";
4781         case BPF_FIELD_SIGNED: return "signed";
4782         case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
4783         case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
4784         case BPF_TYPE_ID_LOCAL: return "local_type_id";
4785         case BPF_TYPE_ID_TARGET: return "target_type_id";
4786         case BPF_TYPE_EXISTS: return "type_exists";
4787         case BPF_TYPE_SIZE: return "type_size";
4788         case BPF_ENUMVAL_EXISTS: return "enumval_exists";
4789         case BPF_ENUMVAL_VALUE: return "enumval_value";
4790         default: return "unknown";
4791         }
4792 }
4793
4794 static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
4795 {
4796         switch (kind) {
4797         case BPF_FIELD_BYTE_OFFSET:
4798         case BPF_FIELD_BYTE_SIZE:
4799         case BPF_FIELD_EXISTS:
4800         case BPF_FIELD_SIGNED:
4801         case BPF_FIELD_LSHIFT_U64:
4802         case BPF_FIELD_RSHIFT_U64:
4803                 return true;
4804         default:
4805                 return false;
4806         }
4807 }
4808
4809 static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
4810 {
4811         switch (kind) {
4812         case BPF_TYPE_ID_LOCAL:
4813         case BPF_TYPE_ID_TARGET:
4814         case BPF_TYPE_EXISTS:
4815         case BPF_TYPE_SIZE:
4816                 return true;
4817         default:
4818                 return false;
4819         }
4820 }
4821
4822 static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
4823 {
4824         switch (kind) {
4825         case BPF_ENUMVAL_EXISTS:
4826         case BPF_ENUMVAL_VALUE:
4827                 return true;
4828         default:
4829                 return false;
4830         }
4831 }
4832
4833 /*
4834  * Turn bpf_core_relo into a low- and high-level spec representation,
4835  * validating correctness along the way, as well as calculating resulting
4836  * field bit offset, specified by accessor string. Low-level spec captures
4837  * every single level of nestedness, including traversing anonymous
4838  * struct/union members. High-level one only captures semantically meaningful
4839  * "turning points": named fields and array indicies.
4840  * E.g., for this case:
4841  *
4842  *   struct sample {
4843  *       int __unimportant;
4844  *       struct {
4845  *           int __1;
4846  *           int __2;
4847  *           int a[7];
4848  *       };
4849  *   };
4850  *
4851  *   struct sample *s = ...;
4852  *
4853  *   int x = &s->a[3]; // access string = '0:1:2:3'
4854  *
4855  * Low-level spec has 1:1 mapping with each element of access string (it's
4856  * just a parsed access string representation): [0, 1, 2, 3].
4857  *
4858  * High-level spec will capture only 3 points:
4859  *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
4860  *   - field 'a' access (corresponds to '2' in low-level spec);
4861  *   - array element #3 access (corresponds to '3' in low-level spec).
4862  *
4863  * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
4864  * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
4865  * spec and raw_spec are kept empty.
4866  *
4867  * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
4868  * string to specify enumerator's value index that need to be relocated.
4869  */
4870 static int bpf_core_parse_spec(const struct btf *btf,
4871                                __u32 type_id,
4872                                const char *spec_str,
4873                                enum bpf_core_relo_kind relo_kind,
4874                                struct bpf_core_spec *spec)
4875 {
4876         int access_idx, parsed_len, i;
4877         struct bpf_core_accessor *acc;
4878         const struct btf_type *t;
4879         const char *name;
4880         __u32 id;
4881         __s64 sz;
4882
4883         if (str_is_empty(spec_str) || *spec_str == ':')
4884                 return -EINVAL;
4885
4886         memset(spec, 0, sizeof(*spec));
4887         spec->btf = btf;
4888         spec->root_type_id = type_id;
4889         spec->relo_kind = relo_kind;
4890
4891         /* type-based relocations don't have a field access string */
4892         if (core_relo_is_type_based(relo_kind)) {
4893                 if (strcmp(spec_str, "0"))
4894                         return -EINVAL;
4895                 return 0;
4896         }
4897
4898         /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
4899         while (*spec_str) {
4900                 if (*spec_str == ':')
4901                         ++spec_str;
4902                 if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
4903                         return -EINVAL;
4904                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4905                         return -E2BIG;
4906                 spec_str += parsed_len;
4907                 spec->raw_spec[spec->raw_len++] = access_idx;
4908         }
4909
4910         if (spec->raw_len == 0)
4911                 return -EINVAL;
4912
4913         t = skip_mods_and_typedefs(btf, type_id, &id);
4914         if (!t)
4915                 return -EINVAL;
4916
4917         access_idx = spec->raw_spec[0];
4918         acc = &spec->spec[0];
4919         acc->type_id = id;
4920         acc->idx = access_idx;
4921         spec->len++;
4922
4923         if (core_relo_is_enumval_based(relo_kind)) {
4924                 if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
4925                         return -EINVAL;
4926
4927                 /* record enumerator name in a first accessor */
4928                 acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
4929                 return 0;
4930         }
4931
4932         if (!core_relo_is_field_based(relo_kind))
4933                 return -EINVAL;
4934
4935         sz = btf__resolve_size(btf, id);
4936         if (sz < 0)
4937                 return sz;
4938         spec->bit_offset = access_idx * sz * 8;
4939
4940         for (i = 1; i < spec->raw_len; i++) {
4941                 t = skip_mods_and_typedefs(btf, id, &id);
4942                 if (!t)
4943                         return -EINVAL;
4944
4945                 access_idx = spec->raw_spec[i];
4946                 acc = &spec->spec[spec->len];
4947
4948                 if (btf_is_composite(t)) {
4949                         const struct btf_member *m;
4950                         __u32 bit_offset;
4951
4952                         if (access_idx >= btf_vlen(t))
4953                                 return -EINVAL;
4954
4955                         bit_offset = btf_member_bit_offset(t, access_idx);
4956                         spec->bit_offset += bit_offset;
4957
4958                         m = btf_members(t) + access_idx;
4959                         if (m->name_off) {
4960                                 name = btf__name_by_offset(btf, m->name_off);
4961                                 if (str_is_empty(name))
4962                                         return -EINVAL;
4963
4964                                 acc->type_id = id;
4965                                 acc->idx = access_idx;
4966                                 acc->name = name;
4967                                 spec->len++;
4968                         }
4969
4970                         id = m->type;
4971                 } else if (btf_is_array(t)) {
4972                         const struct btf_array *a = btf_array(t);
4973                         bool flex;
4974
4975                         t = skip_mods_and_typedefs(btf, a->type, &id);
4976                         if (!t)
4977                                 return -EINVAL;
4978
4979                         flex = is_flex_arr(btf, acc - 1, a);
4980                         if (!flex && access_idx >= a->nelems)
4981                                 return -EINVAL;
4982
4983                         spec->spec[spec->len].type_id = id;
4984                         spec->spec[spec->len].idx = access_idx;
4985                         spec->len++;
4986
4987                         sz = btf__resolve_size(btf, id);
4988                         if (sz < 0)
4989                                 return sz;
4990                         spec->bit_offset += access_idx * sz * 8;
4991                 } else {
4992                         pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
4993                                 type_id, spec_str, i, id, btf_kind_str(t));
4994                         return -EINVAL;
4995                 }
4996         }
4997
4998         return 0;
4999 }
5000
5001 static bool bpf_core_is_flavor_sep(const char *s)
5002 {
5003         /* check X___Y name pattern, where X and Y are not underscores */
5004         return s[0] != '_' &&                                 /* X */
5005                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
5006                s[4] != '_';                                   /* Y */
5007 }
5008
5009 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5010  * before last triple underscore. Struct name part after last triple
5011  * underscore is ignored by BPF CO-RE relocation during relocation matching.
5012  */
5013 static size_t bpf_core_essential_name_len(const char *name)
5014 {
5015         size_t n = strlen(name);
5016         int i;
5017
5018         for (i = n - 5; i >= 0; i--) {
5019                 if (bpf_core_is_flavor_sep(name + i))
5020                         return i + 1;
5021         }
5022         return n;
5023 }
5024
5025 struct core_cand
5026 {
5027         const struct btf *btf;
5028         const struct btf_type *t;
5029         const char *name;
5030         __u32 id;
5031 };
5032
5033 /* dynamically sized list of type IDs and its associated struct btf */
5034 struct core_cand_list {
5035         struct core_cand *cands;
5036         int len;
5037 };
5038
5039 static void bpf_core_free_cands(struct core_cand_list *cands)
5040 {
5041         free(cands->cands);
5042         free(cands);
5043 }
5044
5045 static int bpf_core_add_cands(struct core_cand *local_cand,
5046                               size_t local_essent_len,
5047                               const struct btf *targ_btf,
5048                               const char *targ_btf_name,
5049                               int targ_start_id,
5050                               struct core_cand_list *cands)
5051 {
5052         struct core_cand *new_cands, *cand;
5053         const struct btf_type *t;
5054         const char *targ_name;
5055         size_t targ_essent_len;
5056         int n, i;
5057
5058         n = btf__get_nr_types(targ_btf);
5059         for (i = targ_start_id; i <= n; i++) {
5060                 t = btf__type_by_id(targ_btf, i);
5061                 if (btf_kind(t) != btf_kind(local_cand->t))
5062                         continue;
5063
5064                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
5065                 if (str_is_empty(targ_name))
5066                         continue;
5067
5068                 targ_essent_len = bpf_core_essential_name_len(targ_name);
5069                 if (targ_essent_len != local_essent_len)
5070                         continue;
5071
5072                 if (strncmp(local_cand->name, targ_name, local_essent_len) != 0)
5073                         continue;
5074
5075                 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5076                          local_cand->id, btf_kind_str(local_cand->t),
5077                          local_cand->name, i, btf_kind_str(t), targ_name,
5078                          targ_btf_name);
5079                 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5080                                               sizeof(*cands->cands));
5081                 if (!new_cands)
5082                         return -ENOMEM;
5083
5084                 cand = &new_cands[cands->len];
5085                 cand->btf = targ_btf;
5086                 cand->t = t;
5087                 cand->name = targ_name;
5088                 cand->id = i;
5089
5090                 cands->cands = new_cands;
5091                 cands->len++;
5092         }
5093         return 0;
5094 }
5095
5096 static int load_module_btfs(struct bpf_object *obj)
5097 {
5098         struct bpf_btf_info info;
5099         struct module_btf *mod_btf;
5100         struct btf *btf;
5101         char name[64];
5102         __u32 id = 0, len;
5103         int err, fd;
5104
5105         if (obj->btf_modules_loaded)
5106                 return 0;
5107
5108         if (obj->gen_loader)
5109                 return 0;
5110
5111         /* don't do this again, even if we find no module BTFs */
5112         obj->btf_modules_loaded = true;
5113
5114         /* kernel too old to support module BTFs */
5115         if (!kernel_supports(obj, FEAT_MODULE_BTF))
5116                 return 0;
5117
5118         while (true) {
5119                 err = bpf_btf_get_next_id(id, &id);
5120                 if (err && errno == ENOENT)
5121                         return 0;
5122                 if (err) {
5123                         err = -errno;
5124                         pr_warn("failed to iterate BTF objects: %d\n", err);
5125                         return err;
5126                 }
5127
5128                 fd = bpf_btf_get_fd_by_id(id);
5129                 if (fd < 0) {
5130                         if (errno == ENOENT)
5131                                 continue; /* expected race: BTF was unloaded */
5132                         err = -errno;
5133                         pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
5134                         return err;
5135                 }
5136
5137                 len = sizeof(info);
5138                 memset(&info, 0, sizeof(info));
5139                 info.name = ptr_to_u64(name);
5140                 info.name_len = sizeof(name);
5141
5142                 err = bpf_obj_get_info_by_fd(fd, &info, &len);
5143                 if (err) {
5144                         err = -errno;
5145                         pr_warn("failed to get BTF object #%d info: %d\n", id, err);
5146                         goto err_out;
5147                 }
5148
5149                 /* ignore non-module BTFs */
5150                 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5151                         close(fd);
5152                         continue;
5153                 }
5154
5155                 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5156                 err = libbpf_get_error(btf);
5157                 if (err) {
5158                         pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5159                                 name, id, err);
5160                         goto err_out;
5161                 }
5162
5163                 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5164                                         sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5165                 if (err)
5166                         goto err_out;
5167
5168                 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5169
5170                 mod_btf->btf = btf;
5171                 mod_btf->id = id;
5172                 mod_btf->fd = fd;
5173                 mod_btf->name = strdup(name);
5174                 if (!mod_btf->name) {
5175                         err = -ENOMEM;
5176                         goto err_out;
5177                 }
5178                 continue;
5179
5180 err_out:
5181                 close(fd);
5182                 return err;
5183         }
5184
5185         return 0;
5186 }
5187
5188 static struct core_cand_list *
5189 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5190 {
5191         struct core_cand local_cand = {};
5192         struct core_cand_list *cands;
5193         const struct btf *main_btf;
5194         size_t local_essent_len;
5195         int err, i;
5196
5197         local_cand.btf = local_btf;
5198         local_cand.t = btf__type_by_id(local_btf, local_type_id);
5199         if (!local_cand.t)
5200                 return ERR_PTR(-EINVAL);
5201
5202         local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off);
5203         if (str_is_empty(local_cand.name))
5204                 return ERR_PTR(-EINVAL);
5205         local_essent_len = bpf_core_essential_name_len(local_cand.name);
5206
5207         cands = calloc(1, sizeof(*cands));
5208         if (!cands)
5209                 return ERR_PTR(-ENOMEM);
5210
5211         /* Attempt to find target candidates in vmlinux BTF first */
5212         main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5213         err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5214         if (err)
5215                 goto err_out;
5216
5217         /* if vmlinux BTF has any candidate, don't got for module BTFs */
5218         if (cands->len)
5219                 return cands;
5220
5221         /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5222         if (obj->btf_vmlinux_override)
5223                 return cands;
5224
5225         /* now look through module BTFs, trying to still find candidates */
5226         err = load_module_btfs(obj);
5227         if (err)
5228                 goto err_out;
5229
5230         for (i = 0; i < obj->btf_module_cnt; i++) {
5231                 err = bpf_core_add_cands(&local_cand, local_essent_len,
5232                                          obj->btf_modules[i].btf,
5233                                          obj->btf_modules[i].name,
5234                                          btf__get_nr_types(obj->btf_vmlinux) + 1,
5235                                          cands);
5236                 if (err)
5237                         goto err_out;
5238         }
5239
5240         return cands;
5241 err_out:
5242         bpf_core_free_cands(cands);
5243         return ERR_PTR(err);
5244 }
5245
5246 /* Check two types for compatibility for the purpose of field access
5247  * relocation. const/volatile/restrict and typedefs are skipped to ensure we
5248  * are relocating semantically compatible entities:
5249  *   - any two STRUCTs/UNIONs are compatible and can be mixed;
5250  *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
5251  *   - any two PTRs are always compatible;
5252  *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
5253  *     least one of enums should be anonymous;
5254  *   - for ENUMs, check sizes, names are ignored;
5255  *   - for INT, size and signedness are ignored;
5256  *   - any two FLOATs are always compatible;
5257  *   - for ARRAY, dimensionality is ignored, element types are checked for
5258  *     compatibility recursively;
5259  *   - everything else shouldn't be ever a target of relocation.
5260  * These rules are not set in stone and probably will be adjusted as we get
5261  * more experience with using BPF CO-RE relocations.
5262  */
5263 static int bpf_core_fields_are_compat(const struct btf *local_btf,
5264                                       __u32 local_id,
5265                                       const struct btf *targ_btf,
5266                                       __u32 targ_id)
5267 {
5268         const struct btf_type *local_type, *targ_type;
5269
5270 recur:
5271         local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
5272         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
5273         if (!local_type || !targ_type)
5274                 return -EINVAL;
5275
5276         if (btf_is_composite(local_type) && btf_is_composite(targ_type))
5277                 return 1;
5278         if (btf_kind(local_type) != btf_kind(targ_type))
5279                 return 0;
5280
5281         switch (btf_kind(local_type)) {
5282         case BTF_KIND_PTR:
5283         case BTF_KIND_FLOAT:
5284                 return 1;
5285         case BTF_KIND_FWD:
5286         case BTF_KIND_ENUM: {
5287                 const char *local_name, *targ_name;
5288                 size_t local_len, targ_len;
5289
5290                 local_name = btf__name_by_offset(local_btf,
5291                                                  local_type->name_off);
5292                 targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
5293                 local_len = bpf_core_essential_name_len(local_name);
5294                 targ_len = bpf_core_essential_name_len(targ_name);
5295                 /* one of them is anonymous or both w/ same flavor-less names */
5296                 return local_len == 0 || targ_len == 0 ||
5297                        (local_len == targ_len &&
5298                         strncmp(local_name, targ_name, local_len) == 0);
5299         }
5300         case BTF_KIND_INT:
5301                 /* just reject deprecated bitfield-like integers; all other
5302                  * integers are by default compatible between each other
5303                  */
5304                 return btf_int_offset(local_type) == 0 &&
5305                        btf_int_offset(targ_type) == 0;
5306         case BTF_KIND_ARRAY:
5307                 local_id = btf_array(local_type)->type;
5308                 targ_id = btf_array(targ_type)->type;
5309                 goto recur;
5310         default:
5311                 pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
5312                         btf_kind(local_type), local_id, targ_id);
5313                 return 0;
5314         }
5315 }
5316
5317 /*
5318  * Given single high-level named field accessor in local type, find
5319  * corresponding high-level accessor for a target type. Along the way,
5320  * maintain low-level spec for target as well. Also keep updating target
5321  * bit offset.
5322  *
5323  * Searching is performed through recursive exhaustive enumeration of all
5324  * fields of a struct/union. If there are any anonymous (embedded)
5325  * structs/unions, they are recursively searched as well. If field with
5326  * desired name is found, check compatibility between local and target types,
5327  * before returning result.
5328  *
5329  * 1 is returned, if field is found.
5330  * 0 is returned if no compatible field is found.
5331  * <0 is returned on error.
5332  */
5333 static int bpf_core_match_member(const struct btf *local_btf,
5334                                  const struct bpf_core_accessor *local_acc,
5335                                  const struct btf *targ_btf,
5336                                  __u32 targ_id,
5337                                  struct bpf_core_spec *spec,
5338                                  __u32 *next_targ_id)
5339 {
5340         const struct btf_type *local_type, *targ_type;
5341         const struct btf_member *local_member, *m;
5342         const char *local_name, *targ_name;
5343         __u32 local_id;
5344         int i, n, found;
5345
5346         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
5347         if (!targ_type)
5348                 return -EINVAL;
5349         if (!btf_is_composite(targ_type))
5350                 return 0;
5351
5352         local_id = local_acc->type_id;
5353         local_type = btf__type_by_id(local_btf, local_id);
5354         local_member = btf_members(local_type) + local_acc->idx;
5355         local_name = btf__name_by_offset(local_btf, local_member->name_off);
5356
5357         n = btf_vlen(targ_type);
5358         m = btf_members(targ_type);
5359         for (i = 0; i < n; i++, m++) {
5360                 __u32 bit_offset;
5361
5362                 bit_offset = btf_member_bit_offset(targ_type, i);
5363
5364                 /* too deep struct/union/array nesting */
5365                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
5366                         return -E2BIG;
5367
5368                 /* speculate this member will be the good one */
5369                 spec->bit_offset += bit_offset;
5370                 spec->raw_spec[spec->raw_len++] = i;
5371
5372                 targ_name = btf__name_by_offset(targ_btf, m->name_off);
5373                 if (str_is_empty(targ_name)) {
5374                         /* embedded struct/union, we need to go deeper */
5375                         found = bpf_core_match_member(local_btf, local_acc,
5376                                                       targ_btf, m->type,
5377                                                       spec, next_targ_id);
5378                         if (found) /* either found or error */
5379                                 return found;
5380                 } else if (strcmp(local_name, targ_name) == 0) {
5381                         /* matching named field */
5382                         struct bpf_core_accessor *targ_acc;
5383
5384                         targ_acc = &spec->spec[spec->len++];
5385                         targ_acc->type_id = targ_id;
5386                         targ_acc->idx = i;
5387                         targ_acc->name = targ_name;
5388
5389                         *next_targ_id = m->type;
5390                         found = bpf_core_fields_are_compat(local_btf,
5391                                                            local_member->type,
5392                                                            targ_btf, m->type);
5393                         if (!found)
5394                                 spec->len--; /* pop accessor */
5395                         return found;
5396                 }
5397                 /* member turned out not to be what we looked for */
5398                 spec->bit_offset -= bit_offset;
5399                 spec->raw_len--;
5400         }
5401
5402         return 0;
5403 }
5404
5405 /* Check local and target types for compatibility. This check is used for
5406  * type-based CO-RE relocations and follow slightly different rules than
5407  * field-based relocations. This function assumes that root types were already
5408  * checked for name match. Beyond that initial root-level name check, names
5409  * are completely ignored. Compatibility rules are as follows:
5410  *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5411  *     kind should match for local and target types (i.e., STRUCT is not
5412  *     compatible with UNION);
5413  *   - for ENUMs, the size is ignored;
5414  *   - for INT, size and signedness are ignored;
5415  *   - for ARRAY, dimensionality is ignored, element types are checked for
5416  *     compatibility recursively;
5417  *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
5418  *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5419  *   - FUNC_PROTOs are compatible if they have compatible signature: same
5420  *     number of input args and compatible return and argument types.
5421  * These rules are not set in stone and probably will be adjusted as we get
5422  * more experience with using BPF CO-RE relocations.
5423  */
5424 static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5425                                      const struct btf *targ_btf, __u32 targ_id)
5426 {
5427         const struct btf_type *local_type, *targ_type;
5428         int depth = 32; /* max recursion depth */
5429
5430         /* caller made sure that names match (ignoring flavor suffix) */
5431         local_type = btf__type_by_id(local_btf, local_id);
5432         targ_type = btf__type_by_id(targ_btf, targ_id);
5433         if (btf_kind(local_type) != btf_kind(targ_type))
5434                 return 0;
5435
5436 recur:
5437         depth--;
5438         if (depth < 0)
5439                 return -EINVAL;
5440
5441         local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
5442         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
5443         if (!local_type || !targ_type)
5444                 return -EINVAL;
5445
5446         if (btf_kind(local_type) != btf_kind(targ_type))
5447                 return 0;
5448
5449         switch (btf_kind(local_type)) {
5450         case BTF_KIND_UNKN:
5451         case BTF_KIND_STRUCT:
5452         case BTF_KIND_UNION:
5453         case BTF_KIND_ENUM:
5454         case BTF_KIND_FWD:
5455                 return 1;
5456         case BTF_KIND_INT:
5457                 /* just reject deprecated bitfield-like integers; all other
5458                  * integers are by default compatible between each other
5459                  */
5460                 return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
5461         case BTF_KIND_PTR:
5462                 local_id = local_type->type;
5463                 targ_id = targ_type->type;
5464                 goto recur;
5465         case BTF_KIND_ARRAY:
5466                 local_id = btf_array(local_type)->type;
5467                 targ_id = btf_array(targ_type)->type;
5468                 goto recur;
5469         case BTF_KIND_FUNC_PROTO: {
5470                 struct btf_param *local_p = btf_params(local_type);
5471                 struct btf_param *targ_p = btf_params(targ_type);
5472                 __u16 local_vlen = btf_vlen(local_type);
5473                 __u16 targ_vlen = btf_vlen(targ_type);
5474                 int i, err;
5475
5476                 if (local_vlen != targ_vlen)
5477                         return 0;
5478
5479                 for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
5480                         skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
5481                         skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
5482                         err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
5483                         if (err <= 0)
5484                                 return err;
5485                 }
5486
5487                 /* tail recurse for return type check */
5488                 skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
5489                 skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
5490                 goto recur;
5491         }
5492         default:
5493                 pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
5494                         btf_kind_str(local_type), local_id, targ_id);
5495                 return 0;
5496         }
5497 }
5498
5499 /*
5500  * Try to match local spec to a target type and, if successful, produce full
5501  * target spec (high-level, low-level + bit offset).
5502  */
5503 static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
5504                                const struct btf *targ_btf, __u32 targ_id,
5505                                struct bpf_core_spec *targ_spec)
5506 {
5507         const struct btf_type *targ_type;
5508         const struct bpf_core_accessor *local_acc;
5509         struct bpf_core_accessor *targ_acc;
5510         int i, sz, matched;
5511
5512         memset(targ_spec, 0, sizeof(*targ_spec));
5513         targ_spec->btf = targ_btf;
5514         targ_spec->root_type_id = targ_id;
5515         targ_spec->relo_kind = local_spec->relo_kind;
5516
5517         if (core_relo_is_type_based(local_spec->relo_kind)) {
5518                 return bpf_core_types_are_compat(local_spec->btf,
5519                                                  local_spec->root_type_id,
5520                                                  targ_btf, targ_id);
5521         }
5522
5523         local_acc = &local_spec->spec[0];
5524         targ_acc = &targ_spec->spec[0];
5525
5526         if (core_relo_is_enumval_based(local_spec->relo_kind)) {
5527                 size_t local_essent_len, targ_essent_len;
5528                 const struct btf_enum *e;
5529                 const char *targ_name;
5530
5531                 /* has to resolve to an enum */
5532                 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
5533                 if (!btf_is_enum(targ_type))
5534                         return 0;
5535
5536                 local_essent_len = bpf_core_essential_name_len(local_acc->name);
5537
5538                 for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
5539                         targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
5540                         targ_essent_len = bpf_core_essential_name_len(targ_name);
5541                         if (targ_essent_len != local_essent_len)
5542                                 continue;
5543                         if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
5544                                 targ_acc->type_id = targ_id;
5545                                 targ_acc->idx = i;
5546                                 targ_acc->name = targ_name;
5547                                 targ_spec->len++;
5548                                 targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
5549                                 targ_spec->raw_len++;
5550                                 return 1;
5551                         }
5552                 }
5553                 return 0;
5554         }
5555
5556         if (!core_relo_is_field_based(local_spec->relo_kind))
5557                 return -EINVAL;
5558
5559         for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
5560                 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
5561                                                    &targ_id);
5562                 if (!targ_type)
5563                         return -EINVAL;
5564
5565                 if (local_acc->name) {
5566                         matched = bpf_core_match_member(local_spec->btf,
5567                                                         local_acc,
5568                                                         targ_btf, targ_id,
5569                                                         targ_spec, &targ_id);
5570                         if (matched <= 0)
5571                                 return matched;
5572                 } else {
5573                         /* for i=0, targ_id is already treated as array element
5574                          * type (because it's the original struct), for others
5575                          * we should find array element type first
5576                          */
5577                         if (i > 0) {
5578                                 const struct btf_array *a;
5579                                 bool flex;
5580
5581                                 if (!btf_is_array(targ_type))
5582                                         return 0;
5583
5584                                 a = btf_array(targ_type);
5585                                 flex = is_flex_arr(targ_btf, targ_acc - 1, a);
5586                                 if (!flex && local_acc->idx >= a->nelems)
5587                                         return 0;
5588                                 if (!skip_mods_and_typedefs(targ_btf, a->type,
5589                                                             &targ_id))
5590                                         return -EINVAL;
5591                         }
5592
5593                         /* too deep struct/union/array nesting */
5594                         if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
5595                                 return -E2BIG;
5596
5597                         targ_acc->type_id = targ_id;
5598                         targ_acc->idx = local_acc->idx;
5599                         targ_acc->name = NULL;
5600                         targ_spec->len++;
5601                         targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
5602                         targ_spec->raw_len++;
5603
5604                         sz = btf__resolve_size(targ_btf, targ_id);
5605                         if (sz < 0)
5606                                 return sz;
5607                         targ_spec->bit_offset += local_acc->idx * sz * 8;
5608                 }
5609         }
5610
5611         return 1;
5612 }
5613
5614 static int bpf_core_calc_field_relo(const struct bpf_program *prog,
5615                                     const struct bpf_core_relo *relo,
5616                                     const struct bpf_core_spec *spec,
5617                                     __u32 *val, __u32 *field_sz, __u32 *type_id,
5618                                     bool *validate)
5619 {
5620         const struct bpf_core_accessor *acc;
5621         const struct btf_type *t;
5622         __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
5623         const struct btf_member *m;
5624         const struct btf_type *mt;
5625         bool bitfield;
5626         __s64 sz;
5627
5628         *field_sz = 0;
5629
5630         if (relo->kind == BPF_FIELD_EXISTS) {
5631                 *val = spec ? 1 : 0;
5632                 return 0;
5633         }
5634
5635         if (!spec)
5636                 return -EUCLEAN; /* request instruction poisoning */
5637
5638         acc = &spec->spec[spec->len - 1];
5639         t = btf__type_by_id(spec->btf, acc->type_id);
5640
5641         /* a[n] accessor needs special handling */
5642         if (!acc->name) {
5643                 if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
5644                         *val = spec->bit_offset / 8;
5645                         /* remember field size for load/store mem size */
5646                         sz = btf__resolve_size(spec->btf, acc->type_id);
5647                         if (sz < 0)
5648                                 return -EINVAL;
5649                         *field_sz = sz;
5650                         *type_id = acc->type_id;
5651                 } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
5652                         sz = btf__resolve_size(spec->btf, acc->type_id);
5653                         if (sz < 0)
5654                                 return -EINVAL;
5655                         *val = sz;
5656                 } else {
5657                         pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
5658                                 prog->name, relo->kind, relo->insn_off / 8);
5659                         return -EINVAL;
5660                 }
5661                 if (validate)
5662                         *validate = true;
5663                 return 0;
5664         }
5665
5666         m = btf_members(t) + acc->idx;
5667         mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
5668         bit_off = spec->bit_offset;
5669         bit_sz = btf_member_bitfield_size(t, acc->idx);
5670
5671         bitfield = bit_sz > 0;
5672         if (bitfield) {
5673                 byte_sz = mt->size;
5674                 byte_off = bit_off / 8 / byte_sz * byte_sz;
5675                 /* figure out smallest int size necessary for bitfield load */
5676                 while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
5677                         if (byte_sz >= 8) {
5678                                 /* bitfield can't be read with 64-bit read */
5679                                 pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
5680                                         prog->name, relo->kind, relo->insn_off / 8);
5681                                 return -E2BIG;
5682                         }
5683                         byte_sz *= 2;
5684                         byte_off = bit_off / 8 / byte_sz * byte_sz;
5685                 }
5686         } else {
5687                 sz = btf__resolve_size(spec->btf, field_type_id);
5688                 if (sz < 0)
5689                         return -EINVAL;
5690                 byte_sz = sz;
5691                 byte_off = spec->bit_offset / 8;
5692                 bit_sz = byte_sz * 8;
5693         }
5694
5695         /* for bitfields, all the relocatable aspects are ambiguous and we
5696          * might disagree with compiler, so turn off validation of expected
5697          * value, except for signedness
5698          */
5699         if (validate)
5700                 *validate = !bitfield;
5701
5702         switch (relo->kind) {
5703         case BPF_FIELD_BYTE_OFFSET:
5704                 *val = byte_off;
5705                 if (!bitfield) {
5706                         *field_sz = byte_sz;
5707                         *type_id = field_type_id;
5708                 }
5709                 break;
5710         case BPF_FIELD_BYTE_SIZE:
5711                 *val = byte_sz;
5712                 break;
5713         case BPF_FIELD_SIGNED:
5714                 /* enums will be assumed unsigned */
5715                 *val = btf_is_enum(mt) ||
5716                        (btf_int_encoding(mt) & BTF_INT_SIGNED);
5717                 if (validate)
5718                         *validate = true; /* signedness is never ambiguous */
5719                 break;
5720         case BPF_FIELD_LSHIFT_U64:
5721 #if __BYTE_ORDER == __LITTLE_ENDIAN
5722                 *val = 64 - (bit_off + bit_sz - byte_off  * 8);
5723 #else
5724                 *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
5725 #endif
5726                 break;
5727         case BPF_FIELD_RSHIFT_U64:
5728                 *val = 64 - bit_sz;
5729                 if (validate)
5730                         *validate = true; /* right shift is never ambiguous */
5731                 break;
5732         case BPF_FIELD_EXISTS:
5733         default:
5734                 return -EOPNOTSUPP;
5735         }
5736
5737         return 0;
5738 }
5739
5740 static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
5741                                    const struct bpf_core_spec *spec,
5742                                    __u32 *val)
5743 {
5744         __s64 sz;
5745
5746         /* type-based relos return zero when target type is not found */
5747         if (!spec) {
5748                 *val = 0;
5749                 return 0;
5750         }
5751
5752         switch (relo->kind) {
5753         case BPF_TYPE_ID_TARGET:
5754                 *val = spec->root_type_id;
5755                 break;
5756         case BPF_TYPE_EXISTS:
5757                 *val = 1;
5758                 break;
5759         case BPF_TYPE_SIZE:
5760                 sz = btf__resolve_size(spec->btf, spec->root_type_id);
5761                 if (sz < 0)
5762                         return -EINVAL;
5763                 *val = sz;
5764                 break;
5765         case BPF_TYPE_ID_LOCAL:
5766         /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
5767         default:
5768                 return -EOPNOTSUPP;
5769         }
5770
5771         return 0;
5772 }
5773
5774 static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
5775                                       const struct bpf_core_spec *spec,
5776                                       __u32 *val)
5777 {
5778         const struct btf_type *t;
5779         const struct btf_enum *e;
5780
5781         switch (relo->kind) {
5782         case BPF_ENUMVAL_EXISTS:
5783                 *val = spec ? 1 : 0;
5784                 break;
5785         case BPF_ENUMVAL_VALUE:
5786                 if (!spec)
5787                         return -EUCLEAN; /* request instruction poisoning */
5788                 t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
5789                 e = btf_enum(t) + spec->spec[0].idx;
5790                 *val = e->val;
5791                 break;
5792         default:
5793                 return -EOPNOTSUPP;
5794         }
5795
5796         return 0;
5797 }
5798
5799 struct bpf_core_relo_res
5800 {
5801         /* expected value in the instruction, unless validate == false */
5802         __u32 orig_val;
5803         /* new value that needs to be patched up to */
5804         __u32 new_val;
5805         /* relocation unsuccessful, poison instruction, but don't fail load */
5806         bool poison;
5807         /* some relocations can't be validated against orig_val */
5808         bool validate;
5809         /* for field byte offset relocations or the forms:
5810          *     *(T *)(rX + <off>) = rY
5811          *     rX = *(T *)(rY + <off>),
5812          * we remember original and resolved field size to adjust direct
5813          * memory loads of pointers and integers; this is necessary for 32-bit
5814          * host kernel architectures, but also allows to automatically
5815          * relocate fields that were resized from, e.g., u32 to u64, etc.
5816          */
5817         bool fail_memsz_adjust;
5818         __u32 orig_sz;
5819         __u32 orig_type_id;
5820         __u32 new_sz;
5821         __u32 new_type_id;
5822 };
5823
5824 /* Calculate original and target relocation values, given local and target
5825  * specs and relocation kind. These values are calculated for each candidate.
5826  * If there are multiple candidates, resulting values should all be consistent
5827  * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
5828  * If instruction has to be poisoned, *poison will be set to true.
5829  */
5830 static int bpf_core_calc_relo(const struct bpf_program *prog,
5831                               const struct bpf_core_relo *relo,
5832                               int relo_idx,
5833                               const struct bpf_core_spec *local_spec,
5834                               const struct bpf_core_spec *targ_spec,
5835                               struct bpf_core_relo_res *res)
5836 {
5837         int err = -EOPNOTSUPP;
5838
5839         res->orig_val = 0;
5840         res->new_val = 0;
5841         res->poison = false;
5842         res->validate = true;
5843         res->fail_memsz_adjust = false;
5844         res->orig_sz = res->new_sz = 0;
5845         res->orig_type_id = res->new_type_id = 0;
5846
5847         if (core_relo_is_field_based(relo->kind)) {
5848                 err = bpf_core_calc_field_relo(prog, relo, local_spec,
5849                                                &res->orig_val, &res->orig_sz,
5850                                                &res->orig_type_id, &res->validate);
5851                 err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
5852                                                       &res->new_val, &res->new_sz,
5853                                                       &res->new_type_id, NULL);
5854                 if (err)
5855                         goto done;
5856                 /* Validate if it's safe to adjust load/store memory size.
5857                  * Adjustments are performed only if original and new memory
5858                  * sizes differ.
5859                  */
5860                 res->fail_memsz_adjust = false;
5861                 if (res->orig_sz != res->new_sz) {
5862                         const struct btf_type *orig_t, *new_t;
5863
5864                         orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
5865                         new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
5866
5867                         /* There are two use cases in which it's safe to
5868                          * adjust load/store's mem size:
5869                          *   - reading a 32-bit kernel pointer, while on BPF
5870                          *   size pointers are always 64-bit; in this case
5871                          *   it's safe to "downsize" instruction size due to
5872                          *   pointer being treated as unsigned integer with
5873                          *   zero-extended upper 32-bits;
5874                          *   - reading unsigned integers, again due to
5875                          *   zero-extension is preserving the value correctly.
5876                          *
5877                          * In all other cases it's incorrect to attempt to
5878                          * load/store field because read value will be
5879                          * incorrect, so we poison relocated instruction.
5880                          */
5881                         if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
5882                                 goto done;
5883                         if (btf_is_int(orig_t) && btf_is_int(new_t) &&
5884                             btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
5885                             btf_int_encoding(new_t) != BTF_INT_SIGNED)
5886                                 goto done;
5887
5888                         /* mark as invalid mem size adjustment, but this will
5889                          * only be checked for LDX/STX/ST insns
5890                          */
5891                         res->fail_memsz_adjust = true;
5892                 }
5893         } else if (core_relo_is_type_based(relo->kind)) {
5894                 err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
5895                 err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
5896         } else if (core_relo_is_enumval_based(relo->kind)) {
5897                 err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
5898                 err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
5899         }
5900
5901 done:
5902         if (err == -EUCLEAN) {
5903                 /* EUCLEAN is used to signal instruction poisoning request */
5904                 res->poison = true;
5905                 err = 0;
5906         } else if (err == -EOPNOTSUPP) {
5907                 /* EOPNOTSUPP means unknown/unsupported relocation */
5908                 pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
5909                         prog->name, relo_idx, core_relo_kind_str(relo->kind),
5910                         relo->kind, relo->insn_off / 8);
5911         }
5912
5913         return err;
5914 }
5915
5916 /*
5917  * Turn instruction for which CO_RE relocation failed into invalid one with
5918  * distinct signature.
5919  */
5920 static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
5921                                  int insn_idx, struct bpf_insn *insn)
5922 {
5923         pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
5924                  prog->name, relo_idx, insn_idx);
5925         insn->code = BPF_JMP | BPF_CALL;
5926         insn->dst_reg = 0;
5927         insn->src_reg = 0;
5928         insn->off = 0;
5929         /* if this instruction is reachable (not a dead code),
5930          * verifier will complain with the following message:
5931          * invalid func unknown#195896080
5932          */
5933         insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
5934 }
5935
5936 static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
5937 {
5938         switch (BPF_SIZE(insn->code)) {
5939         case BPF_DW: return 8;
5940         case BPF_W: return 4;
5941         case BPF_H: return 2;
5942         case BPF_B: return 1;
5943         default: return -1;
5944         }
5945 }
5946
5947 static int insn_bytes_to_bpf_size(__u32 sz)
5948 {
5949         switch (sz) {
5950         case 8: return BPF_DW;
5951         case 4: return BPF_W;
5952         case 2: return BPF_H;
5953         case 1: return BPF_B;
5954         default: return -1;
5955         }
5956 }
5957
5958 /*
5959  * Patch relocatable BPF instruction.
5960  *
5961  * Patched value is determined by relocation kind and target specification.
5962  * For existence relocations target spec will be NULL if field/type is not found.
5963  * Expected insn->imm value is determined using relocation kind and local
5964  * spec, and is checked before patching instruction. If actual insn->imm value
5965  * is wrong, bail out with error.
5966  *
5967  * Currently supported classes of BPF instruction are:
5968  * 1. rX = <imm> (assignment with immediate operand);
5969  * 2. rX += <imm> (arithmetic operations with immediate operand);
5970  * 3. rX = <imm64> (load with 64-bit immediate value);
5971  * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
5972  * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
5973  * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
5974  */
5975 static int bpf_core_patch_insn(struct bpf_program *prog,
5976                                const struct bpf_core_relo *relo,
5977                                int relo_idx,
5978                                const struct bpf_core_relo_res *res)
5979 {
5980         __u32 orig_val, new_val;
5981         struct bpf_insn *insn;
5982         int insn_idx;
5983         __u8 class;
5984
5985         if (relo->insn_off % BPF_INSN_SZ)
5986                 return -EINVAL;
5987         insn_idx = relo->insn_off / BPF_INSN_SZ;
5988         /* adjust insn_idx from section frame of reference to the local
5989          * program's frame of reference; (sub-)program code is not yet
5990          * relocated, so it's enough to just subtract in-section offset
5991          */
5992         insn_idx = insn_idx - prog->sec_insn_off;
5993         insn = &prog->insns[insn_idx];
5994         class = BPF_CLASS(insn->code);
5995
5996         if (res->poison) {
5997 poison:
5998                 /* poison second part of ldimm64 to avoid confusing error from
5999                  * verifier about "unknown opcode 00"
6000                  */
6001                 if (is_ldimm64_insn(insn))
6002                         bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
6003                 bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
6004                 return 0;
6005         }
6006
6007         orig_val = res->orig_val;
6008         new_val = res->new_val;
6009
6010         switch (class) {
6011         case BPF_ALU:
6012         case BPF_ALU64:
6013                 if (BPF_SRC(insn->code) != BPF_K)
6014                         return -EINVAL;
6015                 if (res->validate && insn->imm != orig_val) {
6016                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
6017                                 prog->name, relo_idx,
6018                                 insn_idx, insn->imm, orig_val, new_val);
6019                         return -EINVAL;
6020                 }
6021                 orig_val = insn->imm;
6022                 insn->imm = new_val;
6023                 pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
6024                          prog->name, relo_idx, insn_idx,
6025                          orig_val, new_val);
6026                 break;
6027         case BPF_LDX:
6028         case BPF_ST:
6029         case BPF_STX:
6030                 if (res->validate && insn->off != orig_val) {
6031                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
6032                                 prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
6033                         return -EINVAL;
6034                 }
6035                 if (new_val > SHRT_MAX) {
6036                         pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
6037                                 prog->name, relo_idx, insn_idx, new_val);
6038                         return -ERANGE;
6039                 }
6040                 if (res->fail_memsz_adjust) {
6041                         pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
6042                                 "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
6043                                 prog->name, relo_idx, insn_idx);
6044                         goto poison;
6045                 }
6046
6047                 orig_val = insn->off;
6048                 insn->off = new_val;
6049                 pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
6050                          prog->name, relo_idx, insn_idx, orig_val, new_val);
6051
6052                 if (res->new_sz != res->orig_sz) {
6053                         int insn_bytes_sz, insn_bpf_sz;
6054
6055                         insn_bytes_sz = insn_bpf_size_to_bytes(insn);
6056                         if (insn_bytes_sz != res->orig_sz) {
6057                                 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
6058                                         prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
6059                                 return -EINVAL;
6060                         }
6061
6062                         insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
6063                         if (insn_bpf_sz < 0) {
6064                                 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
6065                                         prog->name, relo_idx, insn_idx, res->new_sz);
6066                                 return -EINVAL;
6067                         }
6068
6069                         insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
6070                         pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
6071                                  prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
6072                 }
6073                 break;
6074         case BPF_LD: {
6075                 __u64 imm;
6076
6077                 if (!is_ldimm64_insn(insn) ||
6078                     insn[0].src_reg != 0 || insn[0].off != 0 ||
6079                     insn_idx + 1 >= prog->insns_cnt ||
6080                     insn[1].code != 0 || insn[1].dst_reg != 0 ||
6081                     insn[1].src_reg != 0 || insn[1].off != 0) {
6082                         pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
6083                                 prog->name, relo_idx, insn_idx);
6084                         return -EINVAL;
6085                 }
6086
6087                 imm = insn[0].imm + ((__u64)insn[1].imm << 32);
6088                 if (res->validate && imm != orig_val) {
6089                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
6090                                 prog->name, relo_idx,
6091                                 insn_idx, (unsigned long long)imm,
6092                                 orig_val, new_val);
6093                         return -EINVAL;
6094                 }
6095
6096                 insn[0].imm = new_val;
6097                 insn[1].imm = 0; /* currently only 32-bit values are supported */
6098                 pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
6099                          prog->name, relo_idx, insn_idx,
6100                          (unsigned long long)imm, new_val);
6101                 break;
6102         }
6103         default:
6104                 pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
6105                         prog->name, relo_idx, insn_idx, insn->code,
6106                         insn->src_reg, insn->dst_reg, insn->off, insn->imm);
6107                 return -EINVAL;
6108         }
6109
6110         return 0;
6111 }
6112
6113 /* Output spec definition in the format:
6114  * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
6115  * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
6116  */
6117 static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
6118 {
6119         const struct btf_type *t;
6120         const struct btf_enum *e;
6121         const char *s;
6122         __u32 type_id;
6123         int i;
6124
6125         type_id = spec->root_type_id;
6126         t = btf__type_by_id(spec->btf, type_id);
6127         s = btf__name_by_offset(spec->btf, t->name_off);
6128
6129         libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
6130
6131         if (core_relo_is_type_based(spec->relo_kind))
6132                 return;
6133
6134         if (core_relo_is_enumval_based(spec->relo_kind)) {
6135                 t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
6136                 e = btf_enum(t) + spec->raw_spec[0];
6137                 s = btf__name_by_offset(spec->btf, e->name_off);
6138
6139                 libbpf_print(level, "::%s = %u", s, e->val);
6140                 return;
6141         }
6142
6143         if (core_relo_is_field_based(spec->relo_kind)) {
6144                 for (i = 0; i < spec->len; i++) {
6145                         if (spec->spec[i].name)
6146                                 libbpf_print(level, ".%s", spec->spec[i].name);
6147                         else if (i > 0 || spec->spec[i].idx > 0)
6148                                 libbpf_print(level, "[%u]", spec->spec[i].idx);
6149                 }
6150
6151                 libbpf_print(level, " (");
6152                 for (i = 0; i < spec->raw_len; i++)
6153                         libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
6154
6155                 if (spec->bit_offset % 8)
6156                         libbpf_print(level, " @ offset %u.%u)",
6157                                      spec->bit_offset / 8, spec->bit_offset % 8);
6158                 else
6159                         libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
6160                 return;
6161         }
6162 }
6163
6164 static size_t bpf_core_hash_fn(const void *key, void *ctx)
6165 {
6166         return (size_t)key;
6167 }
6168
6169 static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
6170 {
6171         return k1 == k2;
6172 }
6173
6174 static void *u32_as_hash_key(__u32 x)
6175 {
6176         return (void *)(uintptr_t)x;
6177 }
6178
6179 /*
6180  * CO-RE relocate single instruction.
6181  *
6182  * The outline and important points of the algorithm:
6183  * 1. For given local type, find corresponding candidate target types.
6184  *    Candidate type is a type with the same "essential" name, ignoring
6185  *    everything after last triple underscore (___). E.g., `sample`,
6186  *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
6187  *    for each other. Names with triple underscore are referred to as
6188  *    "flavors" and are useful, among other things, to allow to
6189  *    specify/support incompatible variations of the same kernel struct, which
6190  *    might differ between different kernel versions and/or build
6191  *    configurations.
6192  *
6193  *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
6194  *    converter, when deduplicated BTF of a kernel still contains more than
6195  *    one different types with the same name. In that case, ___2, ___3, etc
6196  *    are appended starting from second name conflict. But start flavors are
6197  *    also useful to be defined "locally", in BPF program, to extract same
6198  *    data from incompatible changes between different kernel
6199  *    versions/configurations. For instance, to handle field renames between
6200  *    kernel versions, one can use two flavors of the struct name with the
6201  *    same common name and use conditional relocations to extract that field,
6202  *    depending on target kernel version.
6203  * 2. For each candidate type, try to match local specification to this
6204  *    candidate target type. Matching involves finding corresponding
6205  *    high-level spec accessors, meaning that all named fields should match,
6206  *    as well as all array accesses should be within the actual bounds. Also,
6207  *    types should be compatible (see bpf_core_fields_are_compat for details).
6208  * 3. It is supported and expected that there might be multiple flavors
6209  *    matching the spec. As long as all the specs resolve to the same set of
6210  *    offsets across all candidates, there is no error. If there is any
6211  *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
6212  *    imprefection of BTF deduplication, which can cause slight duplication of
6213  *    the same BTF type, if some directly or indirectly referenced (by
6214  *    pointer) type gets resolved to different actual types in different
6215  *    object files. If such situation occurs, deduplicated BTF will end up
6216  *    with two (or more) structurally identical types, which differ only in
6217  *    types they refer to through pointer. This should be OK in most cases and
6218  *    is not an error.
6219  * 4. Candidate types search is performed by linearly scanning through all
6220  *    types in target BTF. It is anticipated that this is overall more
6221  *    efficient memory-wise and not significantly worse (if not better)
6222  *    CPU-wise compared to prebuilding a map from all local type names to
6223  *    a list of candidate type names. It's also sped up by caching resolved
6224  *    list of matching candidates per each local "root" type ID, that has at
6225  *    least one bpf_core_relo associated with it. This list is shared
6226  *    between multiple relocations for the same type ID and is updated as some
6227  *    of the candidates are pruned due to structural incompatibility.
6228  */
6229 static int bpf_core_apply_relo(struct bpf_program *prog,
6230                                const struct bpf_core_relo *relo,
6231                                int relo_idx,
6232                                const struct btf *local_btf,
6233                                struct hashmap *cand_cache)
6234 {
6235         struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
6236         const void *type_key = u32_as_hash_key(relo->type_id);
6237         struct bpf_core_relo_res cand_res, targ_res;
6238         const struct btf_type *local_type;
6239         const char *local_name;
6240         struct core_cand_list *cands = NULL;
6241         __u32 local_id;
6242         const char *spec_str;
6243         int i, j, err;
6244
6245         local_id = relo->type_id;
6246         local_type = btf__type_by_id(local_btf, local_id);
6247         if (!local_type)
6248                 return -EINVAL;
6249
6250         local_name = btf__name_by_offset(local_btf, local_type->name_off);
6251         if (!local_name)
6252                 return -EINVAL;
6253
6254         spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
6255         if (str_is_empty(spec_str))
6256                 return -EINVAL;
6257
6258         if (prog->obj->gen_loader) {
6259                 pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
6260                         prog - prog->obj->programs, relo->insn_off / 8,
6261                         local_name, spec_str, relo->kind);
6262                 return -ENOTSUP;
6263         }
6264         err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
6265         if (err) {
6266                 pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
6267                         prog->name, relo_idx, local_id, btf_kind_str(local_type),
6268                         str_is_empty(local_name) ? "<anon>" : local_name,
6269                         spec_str, err);
6270                 return -EINVAL;
6271         }
6272
6273         pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
6274                  relo_idx, core_relo_kind_str(relo->kind), relo->kind);
6275         bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
6276         libbpf_print(LIBBPF_DEBUG, "\n");
6277
6278         /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
6279         if (relo->kind == BPF_TYPE_ID_LOCAL) {
6280                 targ_res.validate = true;
6281                 targ_res.poison = false;
6282                 targ_res.orig_val = local_spec.root_type_id;
6283                 targ_res.new_val = local_spec.root_type_id;
6284                 goto patch_insn;
6285         }
6286
6287         /* libbpf doesn't support candidate search for anonymous types */
6288         if (str_is_empty(spec_str)) {
6289                 pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
6290                         prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
6291                 return -EOPNOTSUPP;
6292         }
6293
6294         if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
6295                 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
6296                 if (IS_ERR(cands)) {
6297                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
6298                                 prog->name, relo_idx, local_id, btf_kind_str(local_type),
6299                                 local_name, PTR_ERR(cands));
6300                         return PTR_ERR(cands);
6301                 }
6302                 err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
6303                 if (err) {
6304                         bpf_core_free_cands(cands);
6305                         return err;
6306                 }
6307         }
6308
6309         for (i = 0, j = 0; i < cands->len; i++) {
6310                 err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
6311                                           cands->cands[i].id, &cand_spec);
6312                 if (err < 0) {
6313                         pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
6314                                 prog->name, relo_idx, i);
6315                         bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
6316                         libbpf_print(LIBBPF_WARN, ": %d\n", err);
6317                         return err;
6318                 }
6319
6320                 pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
6321                          relo_idx, err == 0 ? "non-matching" : "matching", i);
6322                 bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
6323                 libbpf_print(LIBBPF_DEBUG, "\n");
6324
6325                 if (err == 0)
6326                         continue;
6327
6328                 err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
6329                 if (err)
6330                         return err;
6331
6332                 if (j == 0) {
6333                         targ_res = cand_res;
6334                         targ_spec = cand_spec;
6335                 } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
6336                         /* if there are many field relo candidates, they
6337                          * should all resolve to the same bit offset
6338                          */
6339                         pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
6340                                 prog->name, relo_idx, cand_spec.bit_offset,
6341                                 targ_spec.bit_offset);
6342                         return -EINVAL;
6343                 } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
6344                         /* all candidates should result in the same relocation
6345                          * decision and value, otherwise it's dangerous to
6346                          * proceed due to ambiguity
6347                          */
6348                         pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
6349                                 prog->name, relo_idx,
6350                                 cand_res.poison ? "failure" : "success", cand_res.new_val,
6351                                 targ_res.poison ? "failure" : "success", targ_res.new_val);
6352                         return -EINVAL;
6353                 }
6354
6355                 cands->cands[j++] = cands->cands[i];
6356         }
6357
6358         /*
6359          * For BPF_FIELD_EXISTS relo or when used BPF program has field
6360          * existence checks or kernel version/config checks, it's expected
6361          * that we might not find any candidates. In this case, if field
6362          * wasn't found in any candidate, the list of candidates shouldn't
6363          * change at all, we'll just handle relocating appropriately,
6364          * depending on relo's kind.
6365          */
6366         if (j > 0)
6367                 cands->len = j;
6368
6369         /*
6370          * If no candidates were found, it might be both a programmer error,
6371          * as well as expected case, depending whether instruction w/
6372          * relocation is guarded in some way that makes it unreachable (dead
6373          * code) if relocation can't be resolved. This is handled in
6374          * bpf_core_patch_insn() uniformly by replacing that instruction with
6375          * BPF helper call insn (using invalid helper ID). If that instruction
6376          * is indeed unreachable, then it will be ignored and eliminated by
6377          * verifier. If it was an error, then verifier will complain and point
6378          * to a specific instruction number in its log.
6379          */
6380         if (j == 0) {
6381                 pr_debug("prog '%s': relo #%d: no matching targets found\n",
6382                          prog->name, relo_idx);
6383
6384                 /* calculate single target relo result explicitly */
6385                 err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
6386                 if (err)
6387                         return err;
6388         }
6389
6390 patch_insn:
6391         /* bpf_core_patch_insn() should know how to handle missing targ_spec */
6392         err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
6393         if (err) {
6394                 pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n",
6395                         prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err);
6396                 return -EINVAL;
6397         }
6398
6399         return 0;
6400 }
6401
6402 static int
6403 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
6404 {
6405         const struct btf_ext_info_sec *sec;
6406         const struct bpf_core_relo *rec;
6407         const struct btf_ext_info *seg;
6408         struct hashmap_entry *entry;
6409         struct hashmap *cand_cache = NULL;
6410         struct bpf_program *prog;
6411         const char *sec_name;
6412         int i, err = 0, insn_idx, sec_idx;
6413
6414         if (obj->btf_ext->core_relo_info.len == 0)
6415                 return 0;
6416
6417         if (targ_btf_path) {
6418                 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
6419                 err = libbpf_get_error(obj->btf_vmlinux_override);
6420                 if (err) {
6421                         pr_warn("failed to parse target BTF: %d\n", err);
6422                         return err;
6423                 }
6424         }
6425
6426         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
6427         if (IS_ERR(cand_cache)) {
6428                 err = PTR_ERR(cand_cache);
6429                 goto out;
6430         }
6431
6432         seg = &obj->btf_ext->core_relo_info;
6433         for_each_btf_ext_sec(seg, sec) {
6434                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
6435                 if (str_is_empty(sec_name)) {
6436                         err = -EINVAL;
6437                         goto out;
6438                 }
6439                 /* bpf_object's ELF is gone by now so it's not easy to find
6440                  * section index by section name, but we can find *any*
6441                  * bpf_program within desired section name and use it's
6442                  * prog->sec_idx to do a proper search by section index and
6443                  * instruction offset
6444                  */
6445                 prog = NULL;
6446                 for (i = 0; i < obj->nr_programs; i++) {
6447                         prog = &obj->programs[i];
6448                         if (strcmp(prog->sec_name, sec_name) == 0)
6449                                 break;
6450                 }
6451                 if (!prog) {
6452                         pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
6453                         return -ENOENT;
6454                 }
6455                 sec_idx = prog->sec_idx;
6456
6457                 pr_debug("sec '%s': found %d CO-RE relocations\n",
6458                          sec_name, sec->num_info);
6459
6460                 for_each_btf_ext_rec(seg, sec, i, rec) {
6461                         insn_idx = rec->insn_off / BPF_INSN_SZ;
6462                         prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
6463                         if (!prog) {
6464                                 pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n",
6465                                         sec_name, insn_idx, i);
6466                                 err = -EINVAL;
6467                                 goto out;
6468                         }
6469                         /* no need to apply CO-RE relocation if the program is
6470                          * not going to be loaded
6471                          */
6472                         if (!prog->load)
6473                                 continue;
6474
6475                         err = bpf_core_apply_relo(prog, rec, i, obj->btf, cand_cache);
6476                         if (err) {
6477                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
6478                                         prog->name, i, err);
6479                                 goto out;
6480                         }
6481                 }
6482         }
6483
6484 out:
6485         /* obj->btf_vmlinux and module BTFs are freed after object load */
6486         btf__free(obj->btf_vmlinux_override);
6487         obj->btf_vmlinux_override = NULL;
6488
6489         if (!IS_ERR_OR_NULL(cand_cache)) {
6490                 hashmap__for_each_entry(cand_cache, entry, i) {
6491                         bpf_core_free_cands(entry->value);
6492                 }
6493                 hashmap__free(cand_cache);
6494         }
6495         return err;
6496 }
6497
6498 /* Relocate data references within program code:
6499  *  - map references;
6500  *  - global variable references;
6501  *  - extern references.
6502  */
6503 static int
6504 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
6505 {
6506         int i;
6507
6508         for (i = 0; i < prog->nr_reloc; i++) {
6509                 struct reloc_desc *relo = &prog->reloc_desc[i];
6510                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6511                 struct extern_desc *ext;
6512
6513                 switch (relo->type) {
6514                 case RELO_LD64:
6515                         if (obj->gen_loader) {
6516                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
6517                                 insn[0].imm = relo->map_idx;
6518                         } else {
6519                                 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
6520                                 insn[0].imm = obj->maps[relo->map_idx].fd;
6521                         }
6522                         break;
6523                 case RELO_DATA:
6524                         insn[1].imm = insn[0].imm + relo->sym_off;
6525                         if (obj->gen_loader) {
6526                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6527                                 insn[0].imm = relo->map_idx;
6528                         } else {
6529                                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6530                                 insn[0].imm = obj->maps[relo->map_idx].fd;
6531                         }
6532                         break;
6533                 case RELO_EXTERN_VAR:
6534                         ext = &obj->externs[relo->sym_off];
6535                         if (ext->type == EXT_KCFG) {
6536                                 if (obj->gen_loader) {
6537                                         insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6538                                         insn[0].imm = obj->kconfig_map_idx;
6539                                 } else {
6540                                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6541                                         insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
6542                                 }
6543                                 insn[1].imm = ext->kcfg.data_off;
6544                         } else /* EXT_KSYM */ {
6545                                 if (ext->ksym.type_id) { /* typed ksyms */
6546                                         insn[0].src_reg = BPF_PSEUDO_BTF_ID;
6547                                         insn[0].imm = ext->ksym.kernel_btf_id;
6548                                         insn[1].imm = ext->ksym.kernel_btf_obj_fd;
6549                                 } else { /* typeless ksyms */
6550                                         insn[0].imm = (__u32)ext->ksym.addr;
6551                                         insn[1].imm = ext->ksym.addr >> 32;
6552                                 }
6553                         }
6554                         break;
6555                 case RELO_EXTERN_FUNC:
6556                         ext = &obj->externs[relo->sym_off];
6557                         insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
6558                         insn[0].imm = ext->ksym.kernel_btf_id;
6559                         break;
6560                 case RELO_SUBPROG_ADDR:
6561                         if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
6562                                 pr_warn("prog '%s': relo #%d: bad insn\n",
6563                                         prog->name, i);
6564                                 return -EINVAL;
6565                         }
6566                         /* handled already */
6567                         break;
6568                 case RELO_CALL:
6569                         /* handled already */
6570                         break;
6571                 default:
6572                         pr_warn("prog '%s': relo #%d: bad relo type %d\n",
6573                                 prog->name, i, relo->type);
6574                         return -EINVAL;
6575                 }
6576         }
6577
6578         return 0;
6579 }
6580
6581 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
6582                                     const struct bpf_program *prog,
6583                                     const struct btf_ext_info *ext_info,
6584                                     void **prog_info, __u32 *prog_rec_cnt,
6585                                     __u32 *prog_rec_sz)
6586 {
6587         void *copy_start = NULL, *copy_end = NULL;
6588         void *rec, *rec_end, *new_prog_info;
6589         const struct btf_ext_info_sec *sec;
6590         size_t old_sz, new_sz;
6591         const char *sec_name;
6592         int i, off_adj;
6593
6594         for_each_btf_ext_sec(ext_info, sec) {
6595                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
6596                 if (!sec_name)
6597                         return -EINVAL;
6598                 if (strcmp(sec_name, prog->sec_name) != 0)
6599                         continue;
6600
6601                 for_each_btf_ext_rec(ext_info, sec, i, rec) {
6602                         __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
6603
6604                         if (insn_off < prog->sec_insn_off)
6605                                 continue;
6606                         if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
6607                                 break;
6608
6609                         if (!copy_start)
6610                                 copy_start = rec;
6611                         copy_end = rec + ext_info->rec_size;
6612                 }
6613
6614                 if (!copy_start)
6615                         return -ENOENT;
6616
6617                 /* append func/line info of a given (sub-)program to the main
6618                  * program func/line info
6619                  */
6620                 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
6621                 new_sz = old_sz + (copy_end - copy_start);
6622                 new_prog_info = realloc(*prog_info, new_sz);
6623                 if (!new_prog_info)
6624                         return -ENOMEM;
6625                 *prog_info = new_prog_info;
6626                 *prog_rec_cnt = new_sz / ext_info->rec_size;
6627                 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
6628
6629                 /* Kernel instruction offsets are in units of 8-byte
6630                  * instructions, while .BTF.ext instruction offsets generated
6631                  * by Clang are in units of bytes. So convert Clang offsets
6632                  * into kernel offsets and adjust offset according to program
6633                  * relocated position.
6634                  */
6635                 off_adj = prog->sub_insn_off - prog->sec_insn_off;
6636                 rec = new_prog_info + old_sz;
6637                 rec_end = new_prog_info + new_sz;
6638                 for (; rec < rec_end; rec += ext_info->rec_size) {
6639                         __u32 *insn_off = rec;
6640
6641                         *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
6642                 }
6643                 *prog_rec_sz = ext_info->rec_size;
6644                 return 0;
6645         }
6646
6647         return -ENOENT;
6648 }
6649
6650 static int
6651 reloc_prog_func_and_line_info(const struct bpf_object *obj,
6652                               struct bpf_program *main_prog,
6653                               const struct bpf_program *prog)
6654 {
6655         int err;
6656
6657         /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
6658          * supprot func/line info
6659          */
6660         if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
6661                 return 0;
6662
6663         /* only attempt func info relocation if main program's func_info
6664          * relocation was successful
6665          */
6666         if (main_prog != prog && !main_prog->func_info)
6667                 goto line_info;
6668
6669         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
6670                                        &main_prog->func_info,
6671                                        &main_prog->func_info_cnt,
6672                                        &main_prog->func_info_rec_size);
6673         if (err) {
6674                 if (err != -ENOENT) {
6675                         pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
6676                                 prog->name, err);
6677                         return err;
6678                 }
6679                 if (main_prog->func_info) {
6680                         /*
6681                          * Some info has already been found but has problem
6682                          * in the last btf_ext reloc. Must have to error out.
6683                          */
6684                         pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
6685                         return err;
6686                 }
6687                 /* Have problem loading the very first info. Ignore the rest. */
6688                 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6689                         prog->name);
6690         }
6691
6692 line_info:
6693         /* don't relocate line info if main program's relocation failed */
6694         if (main_prog != prog && !main_prog->line_info)
6695                 return 0;
6696
6697         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6698                                        &main_prog->line_info,
6699                                        &main_prog->line_info_cnt,
6700                                        &main_prog->line_info_rec_size);
6701         if (err) {
6702                 if (err != -ENOENT) {
6703                         pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6704                                 prog->name, err);
6705                         return err;
6706                 }
6707                 if (main_prog->line_info) {
6708                         /*
6709                          * Some info has already been found but has problem
6710                          * in the last btf_ext reloc. Must have to error out.
6711                          */
6712                         pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6713                         return err;
6714                 }
6715                 /* Have problem loading the very first info. Ignore the rest. */
6716                 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6717                         prog->name);
6718         }
6719         return 0;
6720 }
6721
6722 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6723 {
6724         size_t insn_idx = *(const size_t *)key;
6725         const struct reloc_desc *relo = elem;
6726
6727         if (insn_idx == relo->insn_idx)
6728                 return 0;
6729         return insn_idx < relo->insn_idx ? -1 : 1;
6730 }
6731
6732 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6733 {
6734         return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6735                        sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6736 }
6737
6738 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6739 {
6740         int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6741         struct reloc_desc *relos;
6742         int i;
6743
6744         if (main_prog == subprog)
6745                 return 0;
6746         relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6747         if (!relos)
6748                 return -ENOMEM;
6749         memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6750                sizeof(*relos) * subprog->nr_reloc);
6751
6752         for (i = main_prog->nr_reloc; i < new_cnt; i++)
6753                 relos[i].insn_idx += subprog->sub_insn_off;
6754         /* After insn_idx adjustment the 'relos' array is still sorted
6755          * by insn_idx and doesn't break bsearch.
6756          */
6757         main_prog->reloc_desc = relos;
6758         main_prog->nr_reloc = new_cnt;
6759         return 0;
6760 }
6761
6762 static int
6763 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6764                        struct bpf_program *prog)
6765 {
6766         size_t sub_insn_idx, insn_idx, new_cnt;
6767         struct bpf_program *subprog;
6768         struct bpf_insn *insns, *insn;
6769         struct reloc_desc *relo;
6770         int err;
6771
6772         err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6773         if (err)
6774                 return err;
6775
6776         for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6777                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6778                 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6779                         continue;
6780
6781                 relo = find_prog_insn_relo(prog, insn_idx);
6782                 if (relo && relo->type == RELO_EXTERN_FUNC)
6783                         /* kfunc relocations will be handled later
6784                          * in bpf_object__relocate_data()
6785                          */
6786                         continue;
6787                 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6788                         pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6789                                 prog->name, insn_idx, relo->type);
6790                         return -LIBBPF_ERRNO__RELOC;
6791                 }
6792                 if (relo) {
6793                         /* sub-program instruction index is a combination of
6794                          * an offset of a symbol pointed to by relocation and
6795                          * call instruction's imm field; for global functions,
6796                          * call always has imm = -1, but for static functions
6797                          * relocation is against STT_SECTION and insn->imm
6798                          * points to a start of a static function
6799                          *
6800                          * for subprog addr relocation, the relo->sym_off + insn->imm is
6801                          * the byte offset in the corresponding section.
6802                          */
6803                         if (relo->type == RELO_CALL)
6804                                 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6805                         else
6806                                 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6807                 } else if (insn_is_pseudo_func(insn)) {
6808                         /*
6809                          * RELO_SUBPROG_ADDR relo is always emitted even if both
6810                          * functions are in the same section, so it shouldn't reach here.
6811                          */
6812                         pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6813                                 prog->name, insn_idx);
6814                         return -LIBBPF_ERRNO__RELOC;
6815                 } else {
6816                         /* if subprogram call is to a static function within
6817                          * the same ELF section, there won't be any relocation
6818                          * emitted, but it also means there is no additional
6819                          * offset necessary, insns->imm is relative to
6820                          * instruction's original position within the section
6821                          */
6822                         sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6823                 }
6824
6825                 /* we enforce that sub-programs should be in .text section */
6826                 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6827                 if (!subprog) {
6828                         pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6829                                 prog->name);
6830                         return -LIBBPF_ERRNO__RELOC;
6831                 }
6832
6833                 /* if it's the first call instruction calling into this
6834                  * subprogram (meaning this subprog hasn't been processed
6835                  * yet) within the context of current main program:
6836                  *   - append it at the end of main program's instructions blog;
6837                  *   - process is recursively, while current program is put on hold;
6838                  *   - if that subprogram calls some other not yet processes
6839                  *   subprogram, same thing will happen recursively until
6840                  *   there are no more unprocesses subprograms left to append
6841                  *   and relocate.
6842                  */
6843                 if (subprog->sub_insn_off == 0) {
6844                         subprog->sub_insn_off = main_prog->insns_cnt;
6845
6846                         new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6847                         insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6848                         if (!insns) {
6849                                 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6850                                 return -ENOMEM;
6851                         }
6852                         main_prog->insns = insns;
6853                         main_prog->insns_cnt = new_cnt;
6854
6855                         memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6856                                subprog->insns_cnt * sizeof(*insns));
6857
6858                         pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6859                                  main_prog->name, subprog->insns_cnt, subprog->name);
6860
6861                         /* The subprog insns are now appended. Append its relos too. */
6862                         err = append_subprog_relos(main_prog, subprog);
6863                         if (err)
6864                                 return err;
6865                         err = bpf_object__reloc_code(obj, main_prog, subprog);
6866                         if (err)
6867                                 return err;
6868                 }
6869
6870                 /* main_prog->insns memory could have been re-allocated, so
6871                  * calculate pointer again
6872                  */
6873                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6874                 /* calculate correct instruction position within current main
6875                  * prog; each main prog can have a different set of
6876                  * subprograms appended (potentially in different order as
6877                  * well), so position of any subprog can be different for
6878                  * different main programs */
6879                 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6880
6881                 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6882                          prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6883         }
6884
6885         return 0;
6886 }
6887
6888 /*
6889  * Relocate sub-program calls.
6890  *
6891  * Algorithm operates as follows. Each entry-point BPF program (referred to as
6892  * main prog) is processed separately. For each subprog (non-entry functions,
6893  * that can be called from either entry progs or other subprogs) gets their
6894  * sub_insn_off reset to zero. This serves as indicator that this subprogram
6895  * hasn't been yet appended and relocated within current main prog. Once its
6896  * relocated, sub_insn_off will point at the position within current main prog
6897  * where given subprog was appended. This will further be used to relocate all
6898  * the call instructions jumping into this subprog.
6899  *
6900  * We start with main program and process all call instructions. If the call
6901  * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6902  * is zero), subprog instructions are appended at the end of main program's
6903  * instruction array. Then main program is "put on hold" while we recursively
6904  * process newly appended subprogram. If that subprogram calls into another
6905  * subprogram that hasn't been appended, new subprogram is appended again to
6906  * the *main* prog's instructions (subprog's instructions are always left
6907  * untouched, as they need to be in unmodified state for subsequent main progs
6908  * and subprog instructions are always sent only as part of a main prog) and
6909  * the process continues recursively. Once all the subprogs called from a main
6910  * prog or any of its subprogs are appended (and relocated), all their
6911  * positions within finalized instructions array are known, so it's easy to
6912  * rewrite call instructions with correct relative offsets, corresponding to
6913  * desired target subprog.
6914  *
6915  * Its important to realize that some subprogs might not be called from some
6916  * main prog and any of its called/used subprogs. Those will keep their
6917  * subprog->sub_insn_off as zero at all times and won't be appended to current
6918  * main prog and won't be relocated within the context of current main prog.
6919  * They might still be used from other main progs later.
6920  *
6921  * Visually this process can be shown as below. Suppose we have two main
6922  * programs mainA and mainB and BPF object contains three subprogs: subA,
6923  * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6924  * subC both call subB:
6925  *
6926  *        +--------+ +-------+
6927  *        |        v v       |
6928  *     +--+---+ +--+-+-+ +---+--+
6929  *     | subA | | subB | | subC |
6930  *     +--+---+ +------+ +---+--+
6931  *        ^                  ^
6932  *        |                  |
6933  *    +---+-------+   +------+----+
6934  *    |   mainA   |   |   mainB   |
6935  *    +-----------+   +-----------+
6936  *
6937  * We'll start relocating mainA, will find subA, append it and start
6938  * processing sub A recursively:
6939  *
6940  *    +-----------+------+
6941  *    |   mainA   | subA |
6942  *    +-----------+------+
6943  *
6944  * At this point we notice that subB is used from subA, so we append it and
6945  * relocate (there are no further subcalls from subB):
6946  *
6947  *    +-----------+------+------+
6948  *    |   mainA   | subA | subB |
6949  *    +-----------+------+------+
6950  *
6951  * At this point, we relocate subA calls, then go one level up and finish with
6952  * relocatin mainA calls. mainA is done.
6953  *
6954  * For mainB process is similar but results in different order. We start with
6955  * mainB and skip subA and subB, as mainB never calls them (at least
6956  * directly), but we see subC is needed, so we append and start processing it:
6957  *
6958  *    +-----------+------+
6959  *    |   mainB   | subC |
6960  *    +-----------+------+
6961  * Now we see subC needs subB, so we go back to it, append and relocate it:
6962  *
6963  *    +-----------+------+------+
6964  *    |   mainB   | subC | subB |
6965  *    +-----------+------+------+
6966  *
6967  * At this point we unwind recursion, relocate calls in subC, then in mainB.
6968  */
6969 static int
6970 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6971 {
6972         struct bpf_program *subprog;
6973         int i, err;
6974
6975         /* mark all subprogs as not relocated (yet) within the context of
6976          * current main program
6977          */
6978         for (i = 0; i < obj->nr_programs; i++) {
6979                 subprog = &obj->programs[i];
6980                 if (!prog_is_subprog(obj, subprog))
6981                         continue;
6982
6983                 subprog->sub_insn_off = 0;
6984         }
6985
6986         err = bpf_object__reloc_code(obj, prog, prog);
6987         if (err)
6988                 return err;
6989
6990
6991         return 0;
6992 }
6993
6994 static void
6995 bpf_object__free_relocs(struct bpf_object *obj)
6996 {
6997         struct bpf_program *prog;
6998         int i;
6999
7000         /* free up relocation descriptors */
7001         for (i = 0; i < obj->nr_programs; i++) {
7002                 prog = &obj->programs[i];
7003                 zfree(&prog->reloc_desc);
7004                 prog->nr_reloc = 0;
7005         }
7006 }
7007
7008 static int
7009 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
7010 {
7011         struct bpf_program *prog;
7012         size_t i, j;
7013         int err;
7014
7015         if (obj->btf_ext) {
7016                 err = bpf_object__relocate_core(obj, targ_btf_path);
7017                 if (err) {
7018                         pr_warn("failed to perform CO-RE relocations: %d\n",
7019                                 err);
7020                         return err;
7021                 }
7022         }
7023
7024         /* Before relocating calls pre-process relocations and mark
7025          * few ld_imm64 instructions that points to subprogs.
7026          * Otherwise bpf_object__reloc_code() later would have to consider
7027          * all ld_imm64 insns as relocation candidates. That would
7028          * reduce relocation speed, since amount of find_prog_insn_relo()
7029          * would increase and most of them will fail to find a relo.
7030          */
7031         for (i = 0; i < obj->nr_programs; i++) {
7032                 prog = &obj->programs[i];
7033                 for (j = 0; j < prog->nr_reloc; j++) {
7034                         struct reloc_desc *relo = &prog->reloc_desc[j];
7035                         struct bpf_insn *insn = &prog->insns[relo->insn_idx];
7036
7037                         /* mark the insn, so it's recognized by insn_is_pseudo_func() */
7038                         if (relo->type == RELO_SUBPROG_ADDR)
7039                                 insn[0].src_reg = BPF_PSEUDO_FUNC;
7040                 }
7041         }
7042
7043         /* relocate subprogram calls and append used subprograms to main
7044          * programs; each copy of subprogram code needs to be relocated
7045          * differently for each main program, because its code location might
7046          * have changed.
7047          * Append subprog relos to main programs to allow data relos to be
7048          * processed after text is completely relocated.
7049          */
7050         for (i = 0; i < obj->nr_programs; i++) {
7051                 prog = &obj->programs[i];
7052                 /* sub-program's sub-calls are relocated within the context of
7053                  * its main program only
7054                  */
7055                 if (prog_is_subprog(obj, prog))
7056                         continue;
7057
7058                 err = bpf_object__relocate_calls(obj, prog);
7059                 if (err) {
7060                         pr_warn("prog '%s': failed to relocate calls: %d\n",
7061                                 prog->name, err);
7062                         return err;
7063                 }
7064         }
7065         /* Process data relos for main programs */
7066         for (i = 0; i < obj->nr_programs; i++) {
7067                 prog = &obj->programs[i];
7068                 if (prog_is_subprog(obj, prog))
7069                         continue;
7070                 err = bpf_object__relocate_data(obj, prog);
7071                 if (err) {
7072                         pr_warn("prog '%s': failed to relocate data references: %d\n",
7073                                 prog->name, err);
7074                         return err;
7075                 }
7076         }
7077         if (!obj->gen_loader)
7078                 bpf_object__free_relocs(obj);
7079         return 0;
7080 }
7081
7082 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
7083                                             GElf_Shdr *shdr, Elf_Data *data);
7084
7085 static int bpf_object__collect_map_relos(struct bpf_object *obj,
7086                                          GElf_Shdr *shdr, Elf_Data *data)
7087 {
7088         const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
7089         int i, j, nrels, new_sz;
7090         const struct btf_var_secinfo *vi = NULL;
7091         const struct btf_type *sec, *var, *def;
7092         struct bpf_map *map = NULL, *targ_map;
7093         const struct btf_member *member;
7094         const char *name, *mname;
7095         Elf_Data *symbols;
7096         unsigned int moff;
7097         GElf_Sym sym;
7098         GElf_Rel rel;
7099         void *tmp;
7100
7101         if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
7102                 return -EINVAL;
7103         sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
7104         if (!sec)
7105                 return -EINVAL;
7106
7107         symbols = obj->efile.symbols;
7108         nrels = shdr->sh_size / shdr->sh_entsize;
7109         for (i = 0; i < nrels; i++) {
7110                 if (!gelf_getrel(data, i, &rel)) {
7111                         pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
7112                         return -LIBBPF_ERRNO__FORMAT;
7113                 }
7114                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
7115                         pr_warn(".maps relo #%d: symbol %zx not found\n",
7116                                 i, (size_t)GELF_R_SYM(rel.r_info));
7117                         return -LIBBPF_ERRNO__FORMAT;
7118                 }
7119                 name = elf_sym_str(obj, sym.st_name) ?: "<?>";
7120                 if (sym.st_shndx != obj->efile.btf_maps_shndx) {
7121                         pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
7122                                 i, name);
7123                         return -LIBBPF_ERRNO__RELOC;
7124                 }
7125
7126                 pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
7127                          i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
7128                          (size_t)rel.r_offset, sym.st_name, name);
7129
7130                 for (j = 0; j < obj->nr_maps; j++) {
7131                         map = &obj->maps[j];
7132                         if (map->sec_idx != obj->efile.btf_maps_shndx)
7133                                 continue;
7134
7135                         vi = btf_var_secinfos(sec) + map->btf_var_idx;
7136                         if (vi->offset <= rel.r_offset &&
7137                             rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
7138                                 break;
7139                 }
7140                 if (j == obj->nr_maps) {
7141                         pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
7142                                 i, name, (size_t)rel.r_offset);
7143                         return -EINVAL;
7144                 }
7145
7146                 if (!bpf_map_type__is_map_in_map(map->def.type))
7147                         return -EINVAL;
7148                 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
7149                     map->def.key_size != sizeof(int)) {
7150                         pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
7151                                 i, map->name, sizeof(int));
7152                         return -EINVAL;
7153                 }
7154
7155                 targ_map = bpf_object__find_map_by_name(obj, name);
7156                 if (!targ_map)
7157                         return -ESRCH;
7158
7159                 var = btf__type_by_id(obj->btf, vi->type);
7160                 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
7161                 if (btf_vlen(def) == 0)
7162                         return -EINVAL;
7163                 member = btf_members(def) + btf_vlen(def) - 1;
7164                 mname = btf__name_by_offset(obj->btf, member->name_off);
7165                 if (strcmp(mname, "values"))
7166                         return -EINVAL;
7167
7168                 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
7169                 if (rel.r_offset - vi->offset < moff)
7170                         return -EINVAL;
7171
7172                 moff = rel.r_offset - vi->offset - moff;
7173                 /* here we use BPF pointer size, which is always 64 bit, as we
7174                  * are parsing ELF that was built for BPF target
7175                  */
7176                 if (moff % bpf_ptr_sz)
7177                         return -EINVAL;
7178                 moff /= bpf_ptr_sz;
7179                 if (moff >= map->init_slots_sz) {
7180                         new_sz = moff + 1;
7181                         tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
7182                         if (!tmp)
7183                                 return -ENOMEM;
7184                         map->init_slots = tmp;
7185                         memset(map->init_slots + map->init_slots_sz, 0,
7186                                (new_sz - map->init_slots_sz) * host_ptr_sz);
7187                         map->init_slots_sz = new_sz;
7188                 }
7189                 map->init_slots[moff] = targ_map;
7190
7191                 pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
7192                          i, map->name, moff, name);
7193         }
7194
7195         return 0;
7196 }
7197
7198 static int cmp_relocs(const void *_a, const void *_b)
7199 {
7200         const struct reloc_desc *a = _a;
7201         const struct reloc_desc *b = _b;
7202
7203         if (a->insn_idx != b->insn_idx)
7204                 return a->insn_idx < b->insn_idx ? -1 : 1;
7205
7206         /* no two relocations should have the same insn_idx, but ... */
7207         if (a->type != b->type)
7208                 return a->type < b->type ? -1 : 1;
7209
7210         return 0;
7211 }
7212
7213 static int bpf_object__collect_relos(struct bpf_object *obj)
7214 {
7215         int i, err;
7216
7217         for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
7218                 GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
7219                 Elf_Data *data = obj->efile.reloc_sects[i].data;
7220                 int idx = shdr->sh_info;
7221
7222                 if (shdr->sh_type != SHT_REL) {
7223                         pr_warn("internal error at %d\n", __LINE__);
7224                         return -LIBBPF_ERRNO__INTERNAL;
7225                 }
7226
7227                 if (idx == obj->efile.st_ops_shndx)
7228                         err = bpf_object__collect_st_ops_relos(obj, shdr, data);
7229                 else if (idx == obj->efile.btf_maps_shndx)
7230                         err = bpf_object__collect_map_relos(obj, shdr, data);
7231                 else
7232                         err = bpf_object__collect_prog_relos(obj, shdr, data);
7233                 if (err)
7234                         return err;
7235         }
7236
7237         for (i = 0; i < obj->nr_programs; i++) {
7238                 struct bpf_program *p = &obj->programs[i];
7239                 
7240                 if (!p->nr_reloc)
7241                         continue;
7242
7243                 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
7244         }
7245         return 0;
7246 }
7247
7248 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
7249 {
7250         if (BPF_CLASS(insn->code) == BPF_JMP &&
7251             BPF_OP(insn->code) == BPF_CALL &&
7252             BPF_SRC(insn->code) == BPF_K &&
7253             insn->src_reg == 0 &&
7254             insn->dst_reg == 0) {
7255                     *func_id = insn->imm;
7256                     return true;
7257         }
7258         return false;
7259 }
7260
7261 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
7262 {
7263         struct bpf_insn *insn = prog->insns;
7264         enum bpf_func_id func_id;
7265         int i;
7266
7267         if (obj->gen_loader)
7268                 return 0;
7269
7270         for (i = 0; i < prog->insns_cnt; i++, insn++) {
7271                 if (!insn_is_helper_call(insn, &func_id))
7272                         continue;
7273
7274                 /* on kernels that don't yet support
7275                  * bpf_probe_read_{kernel,user}[_str] helpers, fall back
7276                  * to bpf_probe_read() which works well for old kernels
7277                  */
7278                 switch (func_id) {
7279                 case BPF_FUNC_probe_read_kernel:
7280                 case BPF_FUNC_probe_read_user:
7281                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7282                                 insn->imm = BPF_FUNC_probe_read;
7283                         break;
7284                 case BPF_FUNC_probe_read_kernel_str:
7285                 case BPF_FUNC_probe_read_user_str:
7286                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7287                                 insn->imm = BPF_FUNC_probe_read_str;
7288                         break;
7289                 default:
7290                         break;
7291                 }
7292         }
7293         return 0;
7294 }
7295
7296 static int
7297 load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
7298              char *license, __u32 kern_version, int *pfd)
7299 {
7300         struct bpf_prog_load_params load_attr = {};
7301         char *cp, errmsg[STRERR_BUFSIZE];
7302         size_t log_buf_size = 0;
7303         char *log_buf = NULL;
7304         int btf_fd, ret;
7305
7306         if (prog->type == BPF_PROG_TYPE_UNSPEC) {
7307                 /*
7308                  * The program type must be set.  Most likely we couldn't find a proper
7309                  * section definition at load time, and thus we didn't infer the type.
7310                  */
7311                 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
7312                         prog->name, prog->sec_name);
7313                 return -EINVAL;
7314         }
7315
7316         if (!insns || !insns_cnt)
7317                 return -EINVAL;
7318
7319         load_attr.prog_type = prog->type;
7320         /* old kernels might not support specifying expected_attach_type */
7321         if (!kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
7322             prog->sec_def->is_exp_attach_type_optional)
7323                 load_attr.expected_attach_type = 0;
7324         else
7325                 load_attr.expected_attach_type = prog->expected_attach_type;
7326         if (kernel_supports(prog->obj, FEAT_PROG_NAME))
7327                 load_attr.name = prog->name;
7328         load_attr.insns = insns;
7329         load_attr.insn_cnt = insns_cnt;
7330         load_attr.license = license;
7331         load_attr.attach_btf_id = prog->attach_btf_id;
7332         if (prog->attach_prog_fd)
7333                 load_attr.attach_prog_fd = prog->attach_prog_fd;
7334         else
7335                 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
7336         load_attr.attach_btf_id = prog->attach_btf_id;
7337         load_attr.kern_version = kern_version;
7338         load_attr.prog_ifindex = prog->prog_ifindex;
7339
7340         /* specify func_info/line_info only if kernel supports them */
7341         btf_fd = bpf_object__btf_fd(prog->obj);
7342         if (btf_fd >= 0 && kernel_supports(prog->obj, FEAT_BTF_FUNC)) {
7343                 load_attr.prog_btf_fd = btf_fd;
7344                 load_attr.func_info = prog->func_info;
7345                 load_attr.func_info_rec_size = prog->func_info_rec_size;
7346                 load_attr.func_info_cnt = prog->func_info_cnt;
7347                 load_attr.line_info = prog->line_info;
7348                 load_attr.line_info_rec_size = prog->line_info_rec_size;
7349                 load_attr.line_info_cnt = prog->line_info_cnt;
7350         }
7351         load_attr.log_level = prog->log_level;
7352         load_attr.prog_flags = prog->prog_flags;
7353
7354         if (prog->obj->gen_loader) {
7355                 bpf_gen__prog_load(prog->obj->gen_loader, &load_attr,
7356                                    prog - prog->obj->programs);
7357                 *pfd = -1;
7358                 return 0;
7359         }
7360 retry_load:
7361         if (log_buf_size) {
7362                 log_buf = malloc(log_buf_size);
7363                 if (!log_buf)
7364                         return -ENOMEM;
7365
7366                 *log_buf = 0;
7367         }
7368
7369         load_attr.log_buf = log_buf;
7370         load_attr.log_buf_sz = log_buf_size;
7371         ret = libbpf__bpf_prog_load(&load_attr);
7372
7373         if (ret >= 0) {
7374                 if (log_buf && load_attr.log_level)
7375                         pr_debug("verifier log:\n%s", log_buf);
7376
7377                 if (prog->obj->rodata_map_idx >= 0 &&
7378                     kernel_supports(prog->obj, FEAT_PROG_BIND_MAP)) {
7379                         struct bpf_map *rodata_map =
7380                                 &prog->obj->maps[prog->obj->rodata_map_idx];
7381
7382                         if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) {
7383                                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7384                                 pr_warn("prog '%s': failed to bind .rodata map: %s\n",
7385                                         prog->name, cp);
7386                                 /* Don't fail hard if can't bind rodata. */
7387                         }
7388                 }
7389
7390                 *pfd = ret;
7391                 ret = 0;
7392                 goto out;
7393         }
7394
7395         if (!log_buf || errno == ENOSPC) {
7396                 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
7397                                    log_buf_size << 1);
7398
7399                 free(log_buf);
7400                 goto retry_load;
7401         }
7402         ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
7403         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7404         pr_warn("load bpf program failed: %s\n", cp);
7405         pr_perm_msg(ret);
7406
7407         if (log_buf && log_buf[0] != '\0') {
7408                 ret = -LIBBPF_ERRNO__VERIFY;
7409                 pr_warn("-- BEGIN DUMP LOG ---\n");
7410                 pr_warn("\n%s\n", log_buf);
7411                 pr_warn("-- END LOG --\n");
7412         } else if (load_attr.insn_cnt >= BPF_MAXINSNS) {
7413                 pr_warn("Program too large (%zu insns), at most %d insns\n",
7414                         load_attr.insn_cnt, BPF_MAXINSNS);
7415                 ret = -LIBBPF_ERRNO__PROG2BIG;
7416         } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
7417                 /* Wrong program type? */
7418                 int fd;
7419
7420                 load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
7421                 load_attr.expected_attach_type = 0;
7422                 load_attr.log_buf = NULL;
7423                 load_attr.log_buf_sz = 0;
7424                 fd = libbpf__bpf_prog_load(&load_attr);
7425                 if (fd >= 0) {
7426                         close(fd);
7427                         ret = -LIBBPF_ERRNO__PROGTYPE;
7428                         goto out;
7429                 }
7430         }
7431
7432 out:
7433         free(log_buf);
7434         return ret;
7435 }
7436
7437 static int bpf_program__record_externs(struct bpf_program *prog)
7438 {
7439         struct bpf_object *obj = prog->obj;
7440         int i;
7441
7442         for (i = 0; i < prog->nr_reloc; i++) {
7443                 struct reloc_desc *relo = &prog->reloc_desc[i];
7444                 struct extern_desc *ext = &obj->externs[relo->sym_off];
7445
7446                 switch (relo->type) {
7447                 case RELO_EXTERN_VAR:
7448                         if (ext->type != EXT_KSYM)
7449                                 continue;
7450                         if (!ext->ksym.type_id) {
7451                                 pr_warn("typeless ksym %s is not supported yet\n",
7452                                         ext->name);
7453                                 return -ENOTSUP;
7454                         }
7455                         bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_VAR,
7456                                                relo->insn_idx);
7457                         break;
7458                 case RELO_EXTERN_FUNC:
7459                         bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_FUNC,
7460                                                relo->insn_idx);
7461                         break;
7462                 default:
7463                         continue;
7464                 }
7465         }
7466         return 0;
7467 }
7468
7469 static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id);
7470
7471 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
7472 {
7473         int err = 0, fd, i;
7474
7475         if (prog->obj->loaded) {
7476                 pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
7477                 return libbpf_err(-EINVAL);
7478         }
7479
7480         if ((prog->type == BPF_PROG_TYPE_TRACING ||
7481              prog->type == BPF_PROG_TYPE_LSM ||
7482              prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
7483                 int btf_obj_fd = 0, btf_type_id = 0;
7484
7485                 err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id);
7486                 if (err)
7487                         return libbpf_err(err);
7488
7489                 prog->attach_btf_obj_fd = btf_obj_fd;
7490                 prog->attach_btf_id = btf_type_id;
7491         }
7492
7493         if (prog->instances.nr < 0 || !prog->instances.fds) {
7494                 if (prog->preprocessor) {
7495                         pr_warn("Internal error: can't load program '%s'\n",
7496                                 prog->name);
7497                         return libbpf_err(-LIBBPF_ERRNO__INTERNAL);
7498                 }
7499
7500                 prog->instances.fds = malloc(sizeof(int));
7501                 if (!prog->instances.fds) {
7502                         pr_warn("Not enough memory for BPF fds\n");
7503                         return libbpf_err(-ENOMEM);
7504                 }
7505                 prog->instances.nr = 1;
7506                 prog->instances.fds[0] = -1;
7507         }
7508
7509         if (!prog->preprocessor) {
7510                 if (prog->instances.nr != 1) {
7511                         pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
7512                                 prog->name, prog->instances.nr);
7513                 }
7514                 if (prog->obj->gen_loader)
7515                         bpf_program__record_externs(prog);
7516                 err = load_program(prog, prog->insns, prog->insns_cnt,
7517                                    license, kern_ver, &fd);
7518                 if (!err)
7519                         prog->instances.fds[0] = fd;
7520                 goto out;
7521         }
7522
7523         for (i = 0; i < prog->instances.nr; i++) {
7524                 struct bpf_prog_prep_result result;
7525                 bpf_program_prep_t preprocessor = prog->preprocessor;
7526
7527                 memset(&result, 0, sizeof(result));
7528                 err = preprocessor(prog, i, prog->insns,
7529                                    prog->insns_cnt, &result);
7530                 if (err) {
7531                         pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
7532                                 i, prog->name);
7533                         goto out;
7534                 }
7535
7536                 if (!result.new_insn_ptr || !result.new_insn_cnt) {
7537                         pr_debug("Skip loading the %dth instance of program '%s'\n",
7538                                  i, prog->name);
7539                         prog->instances.fds[i] = -1;
7540                         if (result.pfd)
7541                                 *result.pfd = -1;
7542                         continue;
7543                 }
7544
7545                 err = load_program(prog, result.new_insn_ptr,
7546                                    result.new_insn_cnt, license, kern_ver, &fd);
7547                 if (err) {
7548                         pr_warn("Loading the %dth instance of program '%s' failed\n",
7549                                 i, prog->name);
7550                         goto out;
7551                 }
7552
7553                 if (result.pfd)
7554                         *result.pfd = fd;
7555                 prog->instances.fds[i] = fd;
7556         }
7557 out:
7558         if (err)
7559                 pr_warn("failed to load program '%s'\n", prog->name);
7560         zfree(&prog->insns);
7561         prog->insns_cnt = 0;
7562         return libbpf_err(err);
7563 }
7564
7565 static int
7566 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7567 {
7568         struct bpf_program *prog;
7569         size_t i;
7570         int err;
7571
7572         for (i = 0; i < obj->nr_programs; i++) {
7573                 prog = &obj->programs[i];
7574                 err = bpf_object__sanitize_prog(obj, prog);
7575                 if (err)
7576                         return err;
7577         }
7578
7579         for (i = 0; i < obj->nr_programs; i++) {
7580                 prog = &obj->programs[i];
7581                 if (prog_is_subprog(obj, prog))
7582                         continue;
7583                 if (!prog->load) {
7584                         pr_debug("prog '%s': skipped loading\n", prog->name);
7585                         continue;
7586                 }
7587                 prog->log_level |= log_level;
7588                 err = bpf_program__load(prog, obj->license, obj->kern_version);
7589                 if (err)
7590                         return err;
7591         }
7592         if (obj->gen_loader)
7593                 bpf_object__free_relocs(obj);
7594         return 0;
7595 }
7596
7597 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7598
7599 static struct bpf_object *
7600 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7601                    const struct bpf_object_open_opts *opts)
7602 {
7603         const char *obj_name, *kconfig, *btf_tmp_path;
7604         struct bpf_program *prog;
7605         struct bpf_object *obj;
7606         char tmp_name[64];
7607         int err;
7608
7609         if (elf_version(EV_CURRENT) == EV_NONE) {
7610                 pr_warn("failed to init libelf for %s\n",
7611                         path ? : "(mem buf)");
7612                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7613         }
7614
7615         if (!OPTS_VALID(opts, bpf_object_open_opts))
7616                 return ERR_PTR(-EINVAL);
7617
7618         obj_name = OPTS_GET(opts, object_name, NULL);
7619         if (obj_buf) {
7620                 if (!obj_name) {
7621                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
7622                                  (unsigned long)obj_buf,
7623                                  (unsigned long)obj_buf_sz);
7624                         obj_name = tmp_name;
7625                 }
7626                 path = obj_name;
7627                 pr_debug("loading object '%s' from buffer\n", obj_name);
7628         }
7629
7630         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
7631         if (IS_ERR(obj))
7632                 return obj;
7633
7634         btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
7635         if (btf_tmp_path) {
7636                 if (strlen(btf_tmp_path) >= PATH_MAX) {
7637                         err = -ENAMETOOLONG;
7638                         goto out;
7639                 }
7640                 obj->btf_custom_path = strdup(btf_tmp_path);
7641                 if (!obj->btf_custom_path) {
7642                         err = -ENOMEM;
7643                         goto out;
7644                 }
7645         }
7646
7647         kconfig = OPTS_GET(opts, kconfig, NULL);
7648         if (kconfig) {
7649                 obj->kconfig = strdup(kconfig);
7650                 if (!obj->kconfig) {
7651                         err = -ENOMEM;
7652                         goto out;
7653                 }
7654         }
7655
7656         err = bpf_object__elf_init(obj);
7657         err = err ? : bpf_object__check_endianness(obj);
7658         err = err ? : bpf_object__elf_collect(obj);
7659         err = err ? : bpf_object__collect_externs(obj);
7660         err = err ? : bpf_object__finalize_btf(obj);
7661         err = err ? : bpf_object__init_maps(obj, opts);
7662         err = err ? : bpf_object__collect_relos(obj);
7663         if (err)
7664                 goto out;
7665         bpf_object__elf_finish(obj);
7666
7667         bpf_object__for_each_program(prog, obj) {
7668                 prog->sec_def = find_sec_def(prog->sec_name);
7669                 if (!prog->sec_def) {
7670                         /* couldn't guess, but user might manually specify */
7671                         pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7672                                 prog->name, prog->sec_name);
7673                         continue;
7674                 }
7675
7676                 if (prog->sec_def->is_sleepable)
7677                         prog->prog_flags |= BPF_F_SLEEPABLE;
7678                 bpf_program__set_type(prog, prog->sec_def->prog_type);
7679                 bpf_program__set_expected_attach_type(prog,
7680                                 prog->sec_def->expected_attach_type);
7681
7682                 if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
7683                     prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
7684                         prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
7685         }
7686
7687         return obj;
7688 out:
7689         bpf_object__close(obj);
7690         return ERR_PTR(err);
7691 }
7692
7693 static struct bpf_object *
7694 __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
7695 {
7696         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
7697                 .relaxed_maps = flags & MAPS_RELAX_COMPAT,
7698         );
7699
7700         /* param validation */
7701         if (!attr->file)
7702                 return NULL;
7703
7704         pr_debug("loading %s\n", attr->file);
7705         return __bpf_object__open(attr->file, NULL, 0, &opts);
7706 }
7707
7708 struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
7709 {
7710         return libbpf_ptr(__bpf_object__open_xattr(attr, 0));
7711 }
7712
7713 struct bpf_object *bpf_object__open(const char *path)
7714 {
7715         struct bpf_object_open_attr attr = {
7716                 .file           = path,
7717                 .prog_type      = BPF_PROG_TYPE_UNSPEC,
7718         };
7719
7720         return libbpf_ptr(__bpf_object__open_xattr(&attr, 0));
7721 }
7722
7723 struct bpf_object *
7724 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
7725 {
7726         if (!path)
7727                 return libbpf_err_ptr(-EINVAL);
7728
7729         pr_debug("loading %s\n", path);
7730
7731         return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts));
7732 }
7733
7734 struct bpf_object *
7735 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7736                      const struct bpf_object_open_opts *opts)
7737 {
7738         if (!obj_buf || obj_buf_sz == 0)
7739                 return libbpf_err_ptr(-EINVAL);
7740
7741         return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts));
7742 }
7743
7744 struct bpf_object *
7745 bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
7746                         const char *name)
7747 {
7748         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
7749                 .object_name = name,
7750                 /* wrong default, but backwards-compatible */
7751                 .relaxed_maps = true,
7752         );
7753
7754         /* returning NULL is wrong, but backwards-compatible */
7755         if (!obj_buf || obj_buf_sz == 0)
7756                 return errno = EINVAL, NULL;
7757
7758         return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts));
7759 }
7760
7761 int bpf_object__unload(struct bpf_object *obj)
7762 {
7763         size_t i;
7764
7765         if (!obj)
7766                 return libbpf_err(-EINVAL);
7767
7768         for (i = 0; i < obj->nr_maps; i++) {
7769                 zclose(obj->maps[i].fd);
7770                 if (obj->maps[i].st_ops)
7771                         zfree(&obj->maps[i].st_ops->kern_vdata);
7772         }
7773
7774         for (i = 0; i < obj->nr_programs; i++)
7775                 bpf_program__unload(&obj->programs[i]);
7776
7777         return 0;
7778 }
7779
7780 static int bpf_object__sanitize_maps(struct bpf_object *obj)
7781 {
7782         struct bpf_map *m;
7783
7784         bpf_object__for_each_map(m, obj) {
7785                 if (!bpf_map__is_internal(m))
7786                         continue;
7787                 if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) {
7788                         pr_warn("kernel doesn't support global data\n");
7789                         return -ENOTSUP;
7790                 }
7791                 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
7792                         m->def.map_flags ^= BPF_F_MMAPABLE;
7793         }
7794
7795         return 0;
7796 }
7797
7798 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7799 {
7800         char sym_type, sym_name[500];
7801         unsigned long long sym_addr;
7802         const struct btf_type *t;
7803         struct extern_desc *ext;
7804         int ret, err = 0;
7805         FILE *f;
7806
7807         f = fopen("/proc/kallsyms", "r");
7808         if (!f) {
7809                 err = -errno;
7810                 pr_warn("failed to open /proc/kallsyms: %d\n", err);
7811                 return err;
7812         }
7813
7814         while (true) {
7815                 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7816                              &sym_addr, &sym_type, sym_name);
7817                 if (ret == EOF && feof(f))
7818                         break;
7819                 if (ret != 3) {
7820                         pr_warn("failed to read kallsyms entry: %d\n", ret);
7821                         err = -EINVAL;
7822                         goto out;
7823                 }
7824
7825                 ext = find_extern_by_name(obj, sym_name);
7826                 if (!ext || ext->type != EXT_KSYM)
7827                         continue;
7828
7829                 t = btf__type_by_id(obj->btf, ext->btf_id);
7830                 if (!btf_is_var(t))
7831                         continue;
7832
7833                 if (ext->is_set && ext->ksym.addr != sym_addr) {
7834                         pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
7835                                 sym_name, ext->ksym.addr, sym_addr);
7836                         err = -EINVAL;
7837                         goto out;
7838                 }
7839                 if (!ext->is_set) {
7840                         ext->is_set = true;
7841                         ext->ksym.addr = sym_addr;
7842                         pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
7843                 }
7844         }
7845
7846 out:
7847         fclose(f);
7848         return err;
7849 }
7850
7851 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
7852                             __u16 kind, struct btf **res_btf,
7853                             int *res_btf_fd)
7854 {
7855         int i, id, btf_fd, err;
7856         struct btf *btf;
7857
7858         btf = obj->btf_vmlinux;
7859         btf_fd = 0;
7860         id = btf__find_by_name_kind(btf, ksym_name, kind);
7861
7862         if (id == -ENOENT) {
7863                 err = load_module_btfs(obj);
7864                 if (err)
7865                         return err;
7866
7867                 for (i = 0; i < obj->btf_module_cnt; i++) {
7868                         btf = obj->btf_modules[i].btf;
7869                         /* we assume module BTF FD is always >0 */
7870                         btf_fd = obj->btf_modules[i].fd;
7871                         id = btf__find_by_name_kind(btf, ksym_name, kind);
7872                         if (id != -ENOENT)
7873                                 break;
7874                 }
7875         }
7876         if (id <= 0) {
7877                 pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
7878                         __btf_kind_str(kind), ksym_name);
7879                 return -ESRCH;
7880         }
7881
7882         *res_btf = btf;
7883         *res_btf_fd = btf_fd;
7884         return id;
7885 }
7886
7887 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
7888                                                struct extern_desc *ext)
7889 {
7890         const struct btf_type *targ_var, *targ_type;
7891         __u32 targ_type_id, local_type_id;
7892         const char *targ_var_name;
7893         int id, btf_fd = 0, err;
7894         struct btf *btf = NULL;
7895
7896         id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd);
7897         if (id < 0)
7898                 return id;
7899
7900         /* find local type_id */
7901         local_type_id = ext->ksym.type_id;
7902
7903         /* find target type_id */
7904         targ_var = btf__type_by_id(btf, id);
7905         targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
7906         targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
7907
7908         err = bpf_core_types_are_compat(obj->btf, local_type_id,
7909                                         btf, targ_type_id);
7910         if (err <= 0) {
7911                 const struct btf_type *local_type;
7912                 const char *targ_name, *local_name;
7913
7914                 local_type = btf__type_by_id(obj->btf, local_type_id);
7915                 local_name = btf__name_by_offset(obj->btf, local_type->name_off);
7916                 targ_name = btf__name_by_offset(btf, targ_type->name_off);
7917
7918                 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7919                         ext->name, local_type_id,
7920                         btf_kind_str(local_type), local_name, targ_type_id,
7921                         btf_kind_str(targ_type), targ_name);
7922                 return -EINVAL;
7923         }
7924
7925         ext->is_set = true;
7926         ext->ksym.kernel_btf_obj_fd = btf_fd;
7927         ext->ksym.kernel_btf_id = id;
7928         pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
7929                  ext->name, id, btf_kind_str(targ_var), targ_var_name);
7930
7931         return 0;
7932 }
7933
7934 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
7935                                                 struct extern_desc *ext)
7936 {
7937         int local_func_proto_id, kfunc_proto_id, kfunc_id;
7938         const struct btf_type *kern_func;
7939         struct btf *kern_btf = NULL;
7940         int ret, kern_btf_fd = 0;
7941
7942         local_func_proto_id = ext->ksym.type_id;
7943
7944         kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC,
7945                                     &kern_btf, &kern_btf_fd);
7946         if (kfunc_id < 0) {
7947                 pr_warn("extern (func ksym) '%s': not found in kernel BTF\n",
7948                         ext->name);
7949                 return kfunc_id;
7950         }
7951
7952         if (kern_btf != obj->btf_vmlinux) {
7953                 pr_warn("extern (func ksym) '%s': function in kernel module is not supported\n",
7954                         ext->name);
7955                 return -ENOTSUP;
7956         }
7957
7958         kern_func = btf__type_by_id(kern_btf, kfunc_id);
7959         kfunc_proto_id = kern_func->type;
7960
7961         ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
7962                                         kern_btf, kfunc_proto_id);
7963         if (ret <= 0) {
7964                 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n",
7965                         ext->name, local_func_proto_id, kfunc_proto_id);
7966                 return -EINVAL;
7967         }
7968
7969         ext->is_set = true;
7970         ext->ksym.kernel_btf_obj_fd = kern_btf_fd;
7971         ext->ksym.kernel_btf_id = kfunc_id;
7972         pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n",
7973                  ext->name, kfunc_id);
7974
7975         return 0;
7976 }
7977
7978 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
7979 {
7980         const struct btf_type *t;
7981         struct extern_desc *ext;
7982         int i, err;
7983
7984         for (i = 0; i < obj->nr_extern; i++) {
7985                 ext = &obj->externs[i];
7986                 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
7987                         continue;
7988
7989                 if (obj->gen_loader) {
7990                         ext->is_set = true;
7991                         ext->ksym.kernel_btf_obj_fd = 0;
7992                         ext->ksym.kernel_btf_id = 0;
7993                         continue;
7994                 }
7995                 t = btf__type_by_id(obj->btf, ext->btf_id);
7996                 if (btf_is_var(t))
7997                         err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
7998                 else
7999                         err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
8000                 if (err)
8001                         return err;
8002         }
8003         return 0;
8004 }
8005
8006 static int bpf_object__resolve_externs(struct bpf_object *obj,
8007                                        const char *extra_kconfig)
8008 {
8009         bool need_config = false, need_kallsyms = false;
8010         bool need_vmlinux_btf = false;
8011         struct extern_desc *ext;
8012         void *kcfg_data = NULL;
8013         int err, i;
8014
8015         if (obj->nr_extern == 0)
8016                 return 0;
8017
8018         if (obj->kconfig_map_idx >= 0)
8019                 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
8020
8021         for (i = 0; i < obj->nr_extern; i++) {
8022                 ext = &obj->externs[i];
8023
8024                 if (ext->type == EXT_KCFG &&
8025                     strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
8026                         void *ext_val = kcfg_data + ext->kcfg.data_off;
8027                         __u32 kver = get_kernel_version();
8028
8029                         if (!kver) {
8030                                 pr_warn("failed to get kernel version\n");
8031                                 return -EINVAL;
8032                         }
8033                         err = set_kcfg_value_num(ext, ext_val, kver);
8034                         if (err)
8035                                 return err;
8036                         pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
8037                 } else if (ext->type == EXT_KCFG &&
8038                            strncmp(ext->name, "CONFIG_", 7) == 0) {
8039                         need_config = true;
8040                 } else if (ext->type == EXT_KSYM) {
8041                         if (ext->ksym.type_id)
8042                                 need_vmlinux_btf = true;
8043                         else
8044                                 need_kallsyms = true;
8045                 } else {
8046                         pr_warn("unrecognized extern '%s'\n", ext->name);
8047                         return -EINVAL;
8048                 }
8049         }
8050         if (need_config && extra_kconfig) {
8051                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
8052                 if (err)
8053                         return -EINVAL;
8054                 need_config = false;
8055                 for (i = 0; i < obj->nr_extern; i++) {
8056                         ext = &obj->externs[i];
8057                         if (ext->type == EXT_KCFG && !ext->is_set) {
8058                                 need_config = true;
8059                                 break;
8060                         }
8061                 }
8062         }
8063         if (need_config) {
8064                 err = bpf_object__read_kconfig_file(obj, kcfg_data);
8065                 if (err)
8066                         return -EINVAL;
8067         }
8068         if (need_kallsyms) {
8069                 err = bpf_object__read_kallsyms_file(obj);
8070                 if (err)
8071                         return -EINVAL;
8072         }
8073         if (need_vmlinux_btf) {
8074                 err = bpf_object__resolve_ksyms_btf_id(obj);
8075                 if (err)
8076                         return -EINVAL;
8077         }
8078         for (i = 0; i < obj->nr_extern; i++) {
8079                 ext = &obj->externs[i];
8080
8081                 if (!ext->is_set && !ext->is_weak) {
8082                         pr_warn("extern %s (strong) not resolved\n", ext->name);
8083                         return -ESRCH;
8084                 } else if (!ext->is_set) {
8085                         pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
8086                                  ext->name);
8087                 }
8088         }
8089
8090         return 0;
8091 }
8092
8093 int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
8094 {
8095         struct bpf_object *obj;
8096         int err, i;
8097
8098         if (!attr)
8099                 return libbpf_err(-EINVAL);
8100         obj = attr->obj;
8101         if (!obj)
8102                 return libbpf_err(-EINVAL);
8103
8104         if (obj->loaded) {
8105                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
8106                 return libbpf_err(-EINVAL);
8107         }
8108
8109         if (obj->gen_loader)
8110                 bpf_gen__init(obj->gen_loader, attr->log_level);
8111
8112         err = bpf_object__probe_loading(obj);
8113         err = err ? : bpf_object__load_vmlinux_btf(obj, false);
8114         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
8115         err = err ? : bpf_object__sanitize_and_load_btf(obj);
8116         err = err ? : bpf_object__sanitize_maps(obj);
8117         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
8118         err = err ? : bpf_object__create_maps(obj);
8119         err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path);
8120         err = err ? : bpf_object__load_progs(obj, attr->log_level);
8121
8122         if (obj->gen_loader) {
8123                 /* reset FDs */
8124                 btf__set_fd(obj->btf, -1);
8125                 for (i = 0; i < obj->nr_maps; i++)
8126                         obj->maps[i].fd = -1;
8127                 if (!err)
8128                         err = bpf_gen__finish(obj->gen_loader);
8129         }
8130
8131         /* clean up module BTFs */
8132         for (i = 0; i < obj->btf_module_cnt; i++) {
8133                 close(obj->btf_modules[i].fd);
8134                 btf__free(obj->btf_modules[i].btf);
8135                 free(obj->btf_modules[i].name);
8136         }
8137         free(obj->btf_modules);
8138
8139         /* clean up vmlinux BTF */
8140         btf__free(obj->btf_vmlinux);
8141         obj->btf_vmlinux = NULL;
8142
8143         obj->loaded = true; /* doesn't matter if successfully or not */
8144
8145         if (err)
8146                 goto out;
8147
8148         return 0;
8149 out:
8150         /* unpin any maps that were auto-pinned during load */
8151         for (i = 0; i < obj->nr_maps; i++)
8152                 if (obj->maps[i].pinned && !obj->maps[i].reused)
8153                         bpf_map__unpin(&obj->maps[i], NULL);
8154
8155         bpf_object__unload(obj);
8156         pr_warn("failed to load object '%s'\n", obj->path);
8157         return libbpf_err(err);
8158 }
8159
8160 int bpf_object__load(struct bpf_object *obj)
8161 {
8162         struct bpf_object_load_attr attr = {
8163                 .obj = obj,
8164         };
8165
8166         return bpf_object__load_xattr(&attr);
8167 }
8168
8169 static int make_parent_dir(const char *path)
8170 {
8171         char *cp, errmsg[STRERR_BUFSIZE];
8172         char *dname, *dir;
8173         int err = 0;
8174
8175         dname = strdup(path);
8176         if (dname == NULL)
8177                 return -ENOMEM;
8178
8179         dir = dirname(dname);
8180         if (mkdir(dir, 0700) && errno != EEXIST)
8181                 err = -errno;
8182
8183         free(dname);
8184         if (err) {
8185                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8186                 pr_warn("failed to mkdir %s: %s\n", path, cp);
8187         }
8188         return err;
8189 }
8190
8191 static int check_path(const char *path)
8192 {
8193         char *cp, errmsg[STRERR_BUFSIZE];
8194         struct statfs st_fs;
8195         char *dname, *dir;
8196         int err = 0;
8197
8198         if (path == NULL)
8199                 return -EINVAL;
8200
8201         dname = strdup(path);
8202         if (dname == NULL)
8203                 return -ENOMEM;
8204
8205         dir = dirname(dname);
8206         if (statfs(dir, &st_fs)) {
8207                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
8208                 pr_warn("failed to statfs %s: %s\n", dir, cp);
8209                 err = -errno;
8210         }
8211         free(dname);
8212
8213         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
8214                 pr_warn("specified path %s is not on BPF FS\n", path);
8215                 err = -EINVAL;
8216         }
8217
8218         return err;
8219 }
8220
8221 int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
8222                               int instance)
8223 {
8224         char *cp, errmsg[STRERR_BUFSIZE];
8225         int err;
8226
8227         err = make_parent_dir(path);
8228         if (err)
8229                 return libbpf_err(err);
8230
8231         err = check_path(path);
8232         if (err)
8233                 return libbpf_err(err);
8234
8235         if (prog == NULL) {
8236                 pr_warn("invalid program pointer\n");
8237                 return libbpf_err(-EINVAL);
8238         }
8239
8240         if (instance < 0 || instance >= prog->instances.nr) {
8241                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
8242                         instance, prog->name, prog->instances.nr);
8243                 return libbpf_err(-EINVAL);
8244         }
8245
8246         if (bpf_obj_pin(prog->instances.fds[instance], path)) {
8247                 err = -errno;
8248                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
8249                 pr_warn("failed to pin program: %s\n", cp);
8250                 return libbpf_err(err);
8251         }
8252         pr_debug("pinned program '%s'\n", path);
8253
8254         return 0;
8255 }
8256
8257 int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
8258                                 int instance)
8259 {
8260         int err;
8261
8262         err = check_path(path);
8263         if (err)
8264                 return libbpf_err(err);
8265
8266         if (prog == NULL) {
8267                 pr_warn("invalid program pointer\n");
8268                 return libbpf_err(-EINVAL);
8269         }
8270
8271         if (instance < 0 || instance >= prog->instances.nr) {
8272                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
8273                         instance, prog->name, prog->instances.nr);
8274                 return libbpf_err(-EINVAL);
8275         }
8276
8277         err = unlink(path);
8278         if (err != 0)
8279                 return libbpf_err(-errno);
8280
8281         pr_debug("unpinned program '%s'\n", path);
8282
8283         return 0;
8284 }
8285
8286 int bpf_program__pin(struct bpf_program *prog, const char *path)
8287 {
8288         int i, err;
8289
8290         err = make_parent_dir(path);
8291         if (err)
8292                 return libbpf_err(err);
8293
8294         err = check_path(path);
8295         if (err)
8296                 return libbpf_err(err);
8297
8298         if (prog == NULL) {
8299                 pr_warn("invalid program pointer\n");
8300                 return libbpf_err(-EINVAL);
8301         }
8302
8303         if (prog->instances.nr <= 0) {
8304                 pr_warn("no instances of prog %s to pin\n", prog->name);
8305                 return libbpf_err(-EINVAL);
8306         }
8307
8308         if (prog->instances.nr == 1) {
8309                 /* don't create subdirs when pinning single instance */
8310                 return bpf_program__pin_instance(prog, path, 0);
8311         }
8312
8313         for (i = 0; i < prog->instances.nr; i++) {
8314                 char buf[PATH_MAX];
8315                 int len;
8316
8317                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
8318                 if (len < 0) {
8319                         err = -EINVAL;
8320                         goto err_unpin;
8321                 } else if (len >= PATH_MAX) {
8322                         err = -ENAMETOOLONG;
8323                         goto err_unpin;
8324                 }
8325
8326                 err = bpf_program__pin_instance(prog, buf, i);
8327                 if (err)
8328                         goto err_unpin;
8329         }
8330
8331         return 0;
8332
8333 err_unpin:
8334         for (i = i - 1; i >= 0; i--) {
8335                 char buf[PATH_MAX];
8336                 int len;
8337
8338                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
8339                 if (len < 0)
8340                         continue;
8341                 else if (len >= PATH_MAX)
8342                         continue;
8343
8344                 bpf_program__unpin_instance(prog, buf, i);
8345         }
8346
8347         rmdir(path);
8348
8349         return libbpf_err(err);
8350 }
8351
8352 int bpf_program__unpin(struct bpf_program *prog, const char *path)
8353 {
8354         int i, err;
8355
8356         err = check_path(path);
8357         if (err)
8358                 return libbpf_err(err);
8359
8360         if (prog == NULL) {
8361                 pr_warn("invalid program pointer\n");
8362                 return libbpf_err(-EINVAL);
8363         }
8364
8365         if (prog->instances.nr <= 0) {
8366                 pr_warn("no instances of prog %s to pin\n", prog->name);
8367                 return libbpf_err(-EINVAL);
8368         }
8369
8370         if (prog->instances.nr == 1) {
8371                 /* don't create subdirs when pinning single instance */
8372                 return bpf_program__unpin_instance(prog, path, 0);
8373         }
8374
8375         for (i = 0; i < prog->instances.nr; i++) {
8376                 char buf[PATH_MAX];
8377                 int len;
8378
8379                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
8380                 if (len < 0)
8381                         return libbpf_err(-EINVAL);
8382                 else if (len >= PATH_MAX)
8383                         return libbpf_err(-ENAMETOOLONG);
8384
8385                 err = bpf_program__unpin_instance(prog, buf, i);
8386                 if (err)
8387                         return err;
8388         }
8389
8390         err = rmdir(path);
8391         if (err)
8392                 return libbpf_err(-errno);
8393
8394         return 0;
8395 }
8396
8397 int bpf_map__pin(struct bpf_map *map, const char *path)
8398 {
8399         char *cp, errmsg[STRERR_BUFSIZE];
8400         int err;
8401
8402         if (map == NULL) {
8403                 pr_warn("invalid map pointer\n");
8404                 return libbpf_err(-EINVAL);
8405         }
8406
8407         if (map->pin_path) {
8408                 if (path && strcmp(path, map->pin_path)) {
8409                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8410                                 bpf_map__name(map), map->pin_path, path);
8411                         return libbpf_err(-EINVAL);
8412                 } else if (map->pinned) {
8413                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
8414                                  bpf_map__name(map), map->pin_path);
8415                         return 0;
8416                 }
8417         } else {
8418                 if (!path) {
8419                         pr_warn("missing a path to pin map '%s' at\n",
8420                                 bpf_map__name(map));
8421                         return libbpf_err(-EINVAL);
8422                 } else if (map->pinned) {
8423                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
8424                         return libbpf_err(-EEXIST);
8425                 }
8426
8427                 map->pin_path = strdup(path);
8428                 if (!map->pin_path) {
8429                         err = -errno;
8430                         goto out_err;
8431                 }
8432         }
8433
8434         err = make_parent_dir(map->pin_path);
8435         if (err)
8436                 return libbpf_err(err);
8437
8438         err = check_path(map->pin_path);
8439         if (err)
8440                 return libbpf_err(err);
8441
8442         if (bpf_obj_pin(map->fd, map->pin_path)) {
8443                 err = -errno;
8444                 goto out_err;
8445         }
8446
8447         map->pinned = true;
8448         pr_debug("pinned map '%s'\n", map->pin_path);
8449
8450         return 0;
8451
8452 out_err:
8453         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8454         pr_warn("failed to pin map: %s\n", cp);
8455         return libbpf_err(err);
8456 }
8457
8458 int bpf_map__unpin(struct bpf_map *map, const char *path)
8459 {
8460         int err;
8461
8462         if (map == NULL) {
8463                 pr_warn("invalid map pointer\n");
8464                 return libbpf_err(-EINVAL);
8465         }
8466
8467         if (map->pin_path) {
8468                 if (path && strcmp(path, map->pin_path)) {
8469                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8470                                 bpf_map__name(map), map->pin_path, path);
8471                         return libbpf_err(-EINVAL);
8472                 }
8473                 path = map->pin_path;
8474         } else if (!path) {
8475                 pr_warn("no path to unpin map '%s' from\n",
8476                         bpf_map__name(map));
8477                 return libbpf_err(-EINVAL);
8478         }
8479
8480         err = check_path(path);
8481         if (err)
8482                 return libbpf_err(err);
8483
8484         err = unlink(path);
8485         if (err != 0)
8486                 return libbpf_err(-errno);
8487
8488         map->pinned = false;
8489         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
8490
8491         return 0;
8492 }
8493
8494 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
8495 {
8496         char *new = NULL;
8497
8498         if (path) {
8499                 new = strdup(path);
8500                 if (!new)
8501                         return libbpf_err(-errno);
8502         }
8503
8504         free(map->pin_path);
8505         map->pin_path = new;
8506         return 0;
8507 }
8508
8509 const char *bpf_map__get_pin_path(const struct bpf_map *map)
8510 {
8511         return map->pin_path;
8512 }
8513
8514 const char *bpf_map__pin_path(const struct bpf_map *map)
8515 {
8516         return map->pin_path;
8517 }
8518
8519 bool bpf_map__is_pinned(const struct bpf_map *map)
8520 {
8521         return map->pinned;
8522 }
8523
8524 static void sanitize_pin_path(char *s)
8525 {
8526         /* bpffs disallows periods in path names */
8527         while (*s) {
8528                 if (*s == '.')
8529                         *s = '_';
8530                 s++;
8531         }
8532 }
8533
8534 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
8535 {
8536         struct bpf_map *map;
8537         int err;
8538
8539         if (!obj)
8540                 return libbpf_err(-ENOENT);
8541
8542         if (!obj->loaded) {
8543                 pr_warn("object not yet loaded; load it first\n");
8544                 return libbpf_err(-ENOENT);
8545         }
8546
8547         bpf_object__for_each_map(map, obj) {
8548                 char *pin_path = NULL;
8549                 char buf[PATH_MAX];
8550
8551                 if (path) {
8552                         int len;
8553
8554                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
8555                                        bpf_map__name(map));
8556                         if (len < 0) {
8557                                 err = -EINVAL;
8558                                 goto err_unpin_maps;
8559                         } else if (len >= PATH_MAX) {
8560                                 err = -ENAMETOOLONG;
8561                                 goto err_unpin_maps;
8562                         }
8563                         sanitize_pin_path(buf);
8564                         pin_path = buf;
8565                 } else if (!map->pin_path) {
8566                         continue;
8567                 }
8568
8569                 err = bpf_map__pin(map, pin_path);
8570                 if (err)
8571                         goto err_unpin_maps;
8572         }
8573
8574         return 0;
8575
8576 err_unpin_maps:
8577         while ((map = bpf_map__prev(map, obj))) {
8578                 if (!map->pin_path)
8579                         continue;
8580
8581                 bpf_map__unpin(map, NULL);
8582         }
8583
8584         return libbpf_err(err);
8585 }
8586
8587 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
8588 {
8589         struct bpf_map *map;
8590         int err;
8591
8592         if (!obj)
8593                 return libbpf_err(-ENOENT);
8594
8595         bpf_object__for_each_map(map, obj) {
8596                 char *pin_path = NULL;
8597                 char buf[PATH_MAX];
8598
8599                 if (path) {
8600                         int len;
8601
8602                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
8603                                        bpf_map__name(map));
8604                         if (len < 0)
8605                                 return libbpf_err(-EINVAL);
8606                         else if (len >= PATH_MAX)
8607                                 return libbpf_err(-ENAMETOOLONG);
8608                         sanitize_pin_path(buf);
8609                         pin_path = buf;
8610                 } else if (!map->pin_path) {
8611                         continue;
8612                 }
8613
8614                 err = bpf_map__unpin(map, pin_path);
8615                 if (err)
8616                         return libbpf_err(err);
8617         }
8618
8619         return 0;
8620 }
8621
8622 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
8623 {
8624         struct bpf_program *prog;
8625         int err;
8626
8627         if (!obj)
8628                 return libbpf_err(-ENOENT);
8629
8630         if (!obj->loaded) {
8631                 pr_warn("object not yet loaded; load it first\n");
8632                 return libbpf_err(-ENOENT);
8633         }
8634
8635         bpf_object__for_each_program(prog, obj) {
8636                 char buf[PATH_MAX];
8637                 int len;
8638
8639                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
8640                                prog->pin_name);
8641                 if (len < 0) {
8642                         err = -EINVAL;
8643                         goto err_unpin_programs;
8644                 } else if (len >= PATH_MAX) {
8645                         err = -ENAMETOOLONG;
8646                         goto err_unpin_programs;
8647                 }
8648
8649                 err = bpf_program__pin(prog, buf);
8650                 if (err)
8651                         goto err_unpin_programs;
8652         }
8653
8654         return 0;
8655
8656 err_unpin_programs:
8657         while ((prog = bpf_program__prev(prog, obj))) {
8658                 char buf[PATH_MAX];
8659                 int len;
8660
8661                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
8662                                prog->pin_name);
8663                 if (len < 0)
8664                         continue;
8665                 else if (len >= PATH_MAX)
8666                         continue;
8667
8668                 bpf_program__unpin(prog, buf);
8669         }
8670
8671         return libbpf_err(err);
8672 }
8673
8674 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8675 {
8676         struct bpf_program *prog;
8677         int err;
8678
8679         if (!obj)
8680                 return libbpf_err(-ENOENT);
8681
8682         bpf_object__for_each_program(prog, obj) {
8683                 char buf[PATH_MAX];
8684                 int len;
8685
8686                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
8687                                prog->pin_name);
8688                 if (len < 0)
8689                         return libbpf_err(-EINVAL);
8690                 else if (len >= PATH_MAX)
8691                         return libbpf_err(-ENAMETOOLONG);
8692
8693                 err = bpf_program__unpin(prog, buf);
8694                 if (err)
8695                         return libbpf_err(err);
8696         }
8697
8698         return 0;
8699 }
8700
8701 int bpf_object__pin(struct bpf_object *obj, const char *path)
8702 {
8703         int err;
8704
8705         err = bpf_object__pin_maps(obj, path);
8706         if (err)
8707                 return libbpf_err(err);
8708
8709         err = bpf_object__pin_programs(obj, path);
8710         if (err) {
8711                 bpf_object__unpin_maps(obj, path);
8712                 return libbpf_err(err);
8713         }
8714
8715         return 0;
8716 }
8717
8718 static void bpf_map__destroy(struct bpf_map *map)
8719 {
8720         if (map->clear_priv)
8721                 map->clear_priv(map, map->priv);
8722         map->priv = NULL;
8723         map->clear_priv = NULL;
8724
8725         if (map->inner_map) {
8726                 bpf_map__destroy(map->inner_map);
8727                 zfree(&map->inner_map);
8728         }
8729
8730         zfree(&map->init_slots);
8731         map->init_slots_sz = 0;
8732
8733         if (map->mmaped) {
8734                 munmap(map->mmaped, bpf_map_mmap_sz(map));
8735                 map->mmaped = NULL;
8736         }
8737
8738         if (map->st_ops) {
8739                 zfree(&map->st_ops->data);
8740                 zfree(&map->st_ops->progs);
8741                 zfree(&map->st_ops->kern_func_off);
8742                 zfree(&map->st_ops);
8743         }
8744
8745         zfree(&map->name);
8746         zfree(&map->pin_path);
8747
8748         if (map->fd >= 0)
8749                 zclose(map->fd);
8750 }
8751
8752 void bpf_object__close(struct bpf_object *obj)
8753 {
8754         size_t i;
8755
8756         if (IS_ERR_OR_NULL(obj))
8757                 return;
8758
8759         if (obj->clear_priv)
8760                 obj->clear_priv(obj, obj->priv);
8761
8762         bpf_gen__free(obj->gen_loader);
8763         bpf_object__elf_finish(obj);
8764         bpf_object__unload(obj);
8765         btf__free(obj->btf);
8766         btf_ext__free(obj->btf_ext);
8767
8768         for (i = 0; i < obj->nr_maps; i++)
8769                 bpf_map__destroy(&obj->maps[i]);
8770
8771         zfree(&obj->btf_custom_path);
8772         zfree(&obj->kconfig);
8773         zfree(&obj->externs);
8774         obj->nr_extern = 0;
8775
8776         zfree(&obj->maps);
8777         obj->nr_maps = 0;
8778
8779         if (obj->programs && obj->nr_programs) {
8780                 for (i = 0; i < obj->nr_programs; i++)
8781                         bpf_program__exit(&obj->programs[i]);
8782         }
8783         zfree(&obj->programs);
8784
8785         list_del(&obj->list);
8786         free(obj);
8787 }
8788
8789 struct bpf_object *
8790 bpf_object__next(struct bpf_object *prev)
8791 {
8792         struct bpf_object *next;
8793
8794         if (!prev)
8795                 next = list_first_entry(&bpf_objects_list,
8796                                         struct bpf_object,
8797                                         list);
8798         else
8799                 next = list_next_entry(prev, list);
8800
8801         /* Empty list is noticed here so don't need checking on entry. */
8802         if (&next->list == &bpf_objects_list)
8803                 return NULL;
8804
8805         return next;
8806 }
8807
8808 const char *bpf_object__name(const struct bpf_object *obj)
8809 {
8810         return obj ? obj->name : libbpf_err_ptr(-EINVAL);
8811 }
8812
8813 unsigned int bpf_object__kversion(const struct bpf_object *obj)
8814 {
8815         return obj ? obj->kern_version : 0;
8816 }
8817
8818 struct btf *bpf_object__btf(const struct bpf_object *obj)
8819 {
8820         return obj ? obj->btf : NULL;
8821 }
8822
8823 int bpf_object__btf_fd(const struct bpf_object *obj)
8824 {
8825         return obj->btf ? btf__fd(obj->btf) : -1;
8826 }
8827
8828 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
8829 {
8830         if (obj->loaded)
8831                 return libbpf_err(-EINVAL);
8832
8833         obj->kern_version = kern_version;
8834
8835         return 0;
8836 }
8837
8838 int bpf_object__set_priv(struct bpf_object *obj, void *priv,
8839                          bpf_object_clear_priv_t clear_priv)
8840 {
8841         if (obj->priv && obj->clear_priv)
8842                 obj->clear_priv(obj, obj->priv);
8843
8844         obj->priv = priv;
8845         obj->clear_priv = clear_priv;
8846         return 0;
8847 }
8848
8849 void *bpf_object__priv(const struct bpf_object *obj)
8850 {
8851         return obj ? obj->priv : libbpf_err_ptr(-EINVAL);
8852 }
8853
8854 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
8855 {
8856         struct bpf_gen *gen;
8857
8858         if (!opts)
8859                 return -EFAULT;
8860         if (!OPTS_VALID(opts, gen_loader_opts))
8861                 return -EINVAL;
8862         gen = calloc(sizeof(*gen), 1);
8863         if (!gen)
8864                 return -ENOMEM;
8865         gen->opts = opts;
8866         obj->gen_loader = gen;
8867         return 0;
8868 }
8869
8870 static struct bpf_program *
8871 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8872                     bool forward)
8873 {
8874         size_t nr_programs = obj->nr_programs;
8875         ssize_t idx;
8876
8877         if (!nr_programs)
8878                 return NULL;
8879
8880         if (!p)
8881                 /* Iter from the beginning */
8882                 return forward ? &obj->programs[0] :
8883                         &obj->programs[nr_programs - 1];
8884
8885         if (p->obj != obj) {
8886                 pr_warn("error: program handler doesn't match object\n");
8887                 return errno = EINVAL, NULL;
8888         }
8889
8890         idx = (p - obj->programs) + (forward ? 1 : -1);
8891         if (idx >= obj->nr_programs || idx < 0)
8892                 return NULL;
8893         return &obj->programs[idx];
8894 }
8895
8896 struct bpf_program *
8897 bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
8898 {
8899         struct bpf_program *prog = prev;
8900
8901         do {
8902                 prog = __bpf_program__iter(prog, obj, true);
8903         } while (prog && prog_is_subprog(obj, prog));
8904
8905         return prog;
8906 }
8907
8908 struct bpf_program *
8909 bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
8910 {
8911         struct bpf_program *prog = next;
8912
8913         do {
8914                 prog = __bpf_program__iter(prog, obj, false);
8915         } while (prog && prog_is_subprog(obj, prog));
8916
8917         return prog;
8918 }
8919
8920 int bpf_program__set_priv(struct bpf_program *prog, void *priv,
8921                           bpf_program_clear_priv_t clear_priv)
8922 {
8923         if (prog->priv && prog->clear_priv)
8924                 prog->clear_priv(prog, prog->priv);
8925
8926         prog->priv = priv;
8927         prog->clear_priv = clear_priv;
8928         return 0;
8929 }
8930
8931 void *bpf_program__priv(const struct bpf_program *prog)
8932 {
8933         return prog ? prog->priv : libbpf_err_ptr(-EINVAL);
8934 }
8935
8936 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
8937 {
8938         prog->prog_ifindex = ifindex;
8939 }
8940
8941 const char *bpf_program__name(const struct bpf_program *prog)
8942 {
8943         return prog->name;
8944 }
8945
8946 const char *bpf_program__section_name(const struct bpf_program *prog)
8947 {
8948         return prog->sec_name;
8949 }
8950
8951 const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
8952 {
8953         const char *title;
8954
8955         title = prog->sec_name;
8956         if (needs_copy) {
8957                 title = strdup(title);
8958                 if (!title) {
8959                         pr_warn("failed to strdup program title\n");
8960                         return libbpf_err_ptr(-ENOMEM);
8961                 }
8962         }
8963
8964         return title;
8965 }
8966
8967 bool bpf_program__autoload(const struct bpf_program *prog)
8968 {
8969         return prog->load;
8970 }
8971
8972 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8973 {
8974         if (prog->obj->loaded)
8975                 return libbpf_err(-EINVAL);
8976
8977         prog->load = autoload;
8978         return 0;
8979 }
8980
8981 int bpf_program__fd(const struct bpf_program *prog)
8982 {
8983         return bpf_program__nth_fd(prog, 0);
8984 }
8985
8986 size_t bpf_program__size(const struct bpf_program *prog)
8987 {
8988         return prog->insns_cnt * BPF_INSN_SZ;
8989 }
8990
8991 int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
8992                           bpf_program_prep_t prep)
8993 {
8994         int *instances_fds;
8995
8996         if (nr_instances <= 0 || !prep)
8997                 return libbpf_err(-EINVAL);
8998
8999         if (prog->instances.nr > 0 || prog->instances.fds) {
9000                 pr_warn("Can't set pre-processor after loading\n");
9001                 return libbpf_err(-EINVAL);
9002         }
9003
9004         instances_fds = malloc(sizeof(int) * nr_instances);
9005         if (!instances_fds) {
9006                 pr_warn("alloc memory failed for fds\n");
9007                 return libbpf_err(-ENOMEM);
9008         }
9009
9010         /* fill all fd with -1 */
9011         memset(instances_fds, -1, sizeof(int) * nr_instances);
9012
9013         prog->instances.nr = nr_instances;
9014         prog->instances.fds = instances_fds;
9015         prog->preprocessor = prep;
9016         return 0;
9017 }
9018
9019 int bpf_program__nth_fd(const struct bpf_program *prog, int n)
9020 {
9021         int fd;
9022
9023         if (!prog)
9024                 return libbpf_err(-EINVAL);
9025
9026         if (n >= prog->instances.nr || n < 0) {
9027                 pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
9028                         n, prog->name, prog->instances.nr);
9029                 return libbpf_err(-EINVAL);
9030         }
9031
9032         fd = prog->instances.fds[n];
9033         if (fd < 0) {
9034                 pr_warn("%dth instance of program '%s' is invalid\n",
9035                         n, prog->name);
9036                 return libbpf_err(-ENOENT);
9037         }
9038
9039         return fd;
9040 }
9041
9042 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog)
9043 {
9044         return prog->type;
9045 }
9046
9047 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
9048 {
9049         prog->type = type;
9050 }
9051
9052 static bool bpf_program__is_type(const struct bpf_program *prog,
9053                                  enum bpf_prog_type type)
9054 {
9055         return prog ? (prog->type == type) : false;
9056 }
9057
9058 #define BPF_PROG_TYPE_FNS(NAME, TYPE)                           \
9059 int bpf_program__set_##NAME(struct bpf_program *prog)           \
9060 {                                                               \
9061         if (!prog)                                              \
9062                 return libbpf_err(-EINVAL);                     \
9063         bpf_program__set_type(prog, TYPE);                      \
9064         return 0;                                               \
9065 }                                                               \
9066                                                                 \
9067 bool bpf_program__is_##NAME(const struct bpf_program *prog)     \
9068 {                                                               \
9069         return bpf_program__is_type(prog, TYPE);                \
9070 }                                                               \
9071
9072 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
9073 BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
9074 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
9075 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
9076 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
9077 BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
9078 BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
9079 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
9080 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
9081 BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
9082 BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
9083 BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
9084 BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
9085
9086 enum bpf_attach_type
9087 bpf_program__get_expected_attach_type(const struct bpf_program *prog)
9088 {
9089         return prog->expected_attach_type;
9090 }
9091
9092 void bpf_program__set_expected_attach_type(struct bpf_program *prog,
9093                                            enum bpf_attach_type type)
9094 {
9095         prog->expected_attach_type = type;
9096 }
9097
9098 #define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional,           \
9099                           attachable, attach_btf)                           \
9100         {                                                                   \
9101                 .sec = string,                                              \
9102                 .len = sizeof(string) - 1,                                  \
9103                 .prog_type = ptype,                                         \
9104                 .expected_attach_type = eatype,                             \
9105                 .is_exp_attach_type_optional = eatype_optional,             \
9106                 .is_attachable = attachable,                                \
9107                 .is_attach_btf = attach_btf,                                \
9108         }
9109
9110 /* Programs that can NOT be attached. */
9111 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
9112
9113 /* Programs that can be attached. */
9114 #define BPF_APROG_SEC(string, ptype, atype) \
9115         BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0)
9116
9117 /* Programs that must specify expected attach type at load time. */
9118 #define BPF_EAPROG_SEC(string, ptype, eatype) \
9119         BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0)
9120
9121 /* Programs that use BTF to identify attach point */
9122 #define BPF_PROG_BTF(string, ptype, eatype) \
9123         BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1)
9124
9125 /* Programs that can be attached but attach type can't be identified by section
9126  * name. Kept for backward compatibility.
9127  */
9128 #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
9129
9130 #define SEC_DEF(sec_pfx, ptype, ...) {                                      \
9131         .sec = sec_pfx,                                                     \
9132         .len = sizeof(sec_pfx) - 1,                                         \
9133         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
9134         __VA_ARGS__                                                         \
9135 }
9136
9137 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
9138                                       struct bpf_program *prog);
9139 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
9140                                   struct bpf_program *prog);
9141 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
9142                                       struct bpf_program *prog);
9143 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
9144                                      struct bpf_program *prog);
9145 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
9146                                    struct bpf_program *prog);
9147 static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
9148                                     struct bpf_program *prog);
9149
9150 static const struct bpf_sec_def section_defs[] = {
9151         BPF_PROG_SEC("socket",                  BPF_PROG_TYPE_SOCKET_FILTER),
9152         BPF_EAPROG_SEC("sk_reuseport/migrate",  BPF_PROG_TYPE_SK_REUSEPORT,
9153                                                 BPF_SK_REUSEPORT_SELECT_OR_MIGRATE),
9154         BPF_EAPROG_SEC("sk_reuseport",          BPF_PROG_TYPE_SK_REUSEPORT,
9155                                                 BPF_SK_REUSEPORT_SELECT),
9156         SEC_DEF("kprobe/", KPROBE,
9157                 .attach_fn = attach_kprobe),
9158         BPF_PROG_SEC("uprobe/",                 BPF_PROG_TYPE_KPROBE),
9159         SEC_DEF("kretprobe/", KPROBE,
9160                 .attach_fn = attach_kprobe),
9161         BPF_PROG_SEC("uretprobe/",              BPF_PROG_TYPE_KPROBE),
9162         BPF_PROG_SEC("classifier",              BPF_PROG_TYPE_SCHED_CLS),
9163         BPF_PROG_SEC("action",                  BPF_PROG_TYPE_SCHED_ACT),
9164         SEC_DEF("tracepoint/", TRACEPOINT,
9165                 .attach_fn = attach_tp),
9166         SEC_DEF("tp/", TRACEPOINT,
9167                 .attach_fn = attach_tp),
9168         SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
9169                 .attach_fn = attach_raw_tp),
9170         SEC_DEF("raw_tp/", RAW_TRACEPOINT,
9171                 .attach_fn = attach_raw_tp),
9172         SEC_DEF("tp_btf/", TRACING,
9173                 .expected_attach_type = BPF_TRACE_RAW_TP,
9174                 .is_attach_btf = true,
9175                 .attach_fn = attach_trace),
9176         SEC_DEF("fentry/", TRACING,
9177                 .expected_attach_type = BPF_TRACE_FENTRY,
9178                 .is_attach_btf = true,
9179                 .attach_fn = attach_trace),
9180         SEC_DEF("fmod_ret/", TRACING,
9181                 .expected_attach_type = BPF_MODIFY_RETURN,
9182                 .is_attach_btf = true,
9183                 .attach_fn = attach_trace),
9184         SEC_DEF("fexit/", TRACING,
9185                 .expected_attach_type = BPF_TRACE_FEXIT,
9186                 .is_attach_btf = true,
9187                 .attach_fn = attach_trace),
9188         SEC_DEF("fentry.s/", TRACING,
9189                 .expected_attach_type = BPF_TRACE_FENTRY,
9190                 .is_attach_btf = true,
9191                 .is_sleepable = true,
9192                 .attach_fn = attach_trace),
9193         SEC_DEF("fmod_ret.s/", TRACING,
9194                 .expected_attach_type = BPF_MODIFY_RETURN,
9195                 .is_attach_btf = true,
9196                 .is_sleepable = true,
9197                 .attach_fn = attach_trace),
9198         SEC_DEF("fexit.s/", TRACING,
9199                 .expected_attach_type = BPF_TRACE_FEXIT,
9200                 .is_attach_btf = true,
9201                 .is_sleepable = true,
9202                 .attach_fn = attach_trace),
9203         SEC_DEF("freplace/", EXT,
9204                 .is_attach_btf = true,
9205                 .attach_fn = attach_trace),
9206         SEC_DEF("lsm/", LSM,
9207                 .is_attach_btf = true,
9208                 .expected_attach_type = BPF_LSM_MAC,
9209                 .attach_fn = attach_lsm),
9210         SEC_DEF("lsm.s/", LSM,
9211                 .is_attach_btf = true,
9212                 .is_sleepable = true,
9213                 .expected_attach_type = BPF_LSM_MAC,
9214                 .attach_fn = attach_lsm),
9215         SEC_DEF("iter/", TRACING,
9216                 .expected_attach_type = BPF_TRACE_ITER,
9217                 .is_attach_btf = true,
9218                 .attach_fn = attach_iter),
9219         SEC_DEF("syscall", SYSCALL,
9220                 .is_sleepable = true),
9221         BPF_EAPROG_SEC("xdp_devmap/",           BPF_PROG_TYPE_XDP,
9222                                                 BPF_XDP_DEVMAP),
9223         BPF_EAPROG_SEC("xdp_cpumap/",           BPF_PROG_TYPE_XDP,
9224                                                 BPF_XDP_CPUMAP),
9225         BPF_APROG_SEC("xdp",                    BPF_PROG_TYPE_XDP,
9226                                                 BPF_XDP),
9227         BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
9228         BPF_PROG_SEC("lwt_in",                  BPF_PROG_TYPE_LWT_IN),
9229         BPF_PROG_SEC("lwt_out",                 BPF_PROG_TYPE_LWT_OUT),
9230         BPF_PROG_SEC("lwt_xmit",                BPF_PROG_TYPE_LWT_XMIT),
9231         BPF_PROG_SEC("lwt_seg6local",           BPF_PROG_TYPE_LWT_SEG6LOCAL),
9232         BPF_APROG_SEC("cgroup_skb/ingress",     BPF_PROG_TYPE_CGROUP_SKB,
9233                                                 BPF_CGROUP_INET_INGRESS),
9234         BPF_APROG_SEC("cgroup_skb/egress",      BPF_PROG_TYPE_CGROUP_SKB,
9235                                                 BPF_CGROUP_INET_EGRESS),
9236         BPF_APROG_COMPAT("cgroup/skb",          BPF_PROG_TYPE_CGROUP_SKB),
9237         BPF_EAPROG_SEC("cgroup/sock_create",    BPF_PROG_TYPE_CGROUP_SOCK,
9238                                                 BPF_CGROUP_INET_SOCK_CREATE),
9239         BPF_EAPROG_SEC("cgroup/sock_release",   BPF_PROG_TYPE_CGROUP_SOCK,
9240                                                 BPF_CGROUP_INET_SOCK_RELEASE),
9241         BPF_APROG_SEC("cgroup/sock",            BPF_PROG_TYPE_CGROUP_SOCK,
9242                                                 BPF_CGROUP_INET_SOCK_CREATE),
9243         BPF_EAPROG_SEC("cgroup/post_bind4",     BPF_PROG_TYPE_CGROUP_SOCK,
9244                                                 BPF_CGROUP_INET4_POST_BIND),
9245         BPF_EAPROG_SEC("cgroup/post_bind6",     BPF_PROG_TYPE_CGROUP_SOCK,
9246                                                 BPF_CGROUP_INET6_POST_BIND),
9247         BPF_APROG_SEC("cgroup/dev",             BPF_PROG_TYPE_CGROUP_DEVICE,
9248                                                 BPF_CGROUP_DEVICE),
9249         BPF_APROG_SEC("sockops",                BPF_PROG_TYPE_SOCK_OPS,
9250                                                 BPF_CGROUP_SOCK_OPS),
9251         BPF_APROG_SEC("sk_skb/stream_parser",   BPF_PROG_TYPE_SK_SKB,
9252                                                 BPF_SK_SKB_STREAM_PARSER),
9253         BPF_APROG_SEC("sk_skb/stream_verdict",  BPF_PROG_TYPE_SK_SKB,
9254                                                 BPF_SK_SKB_STREAM_VERDICT),
9255         BPF_APROG_COMPAT("sk_skb",              BPF_PROG_TYPE_SK_SKB),
9256         BPF_APROG_SEC("sk_msg",                 BPF_PROG_TYPE_SK_MSG,
9257                                                 BPF_SK_MSG_VERDICT),
9258         BPF_APROG_SEC("lirc_mode2",             BPF_PROG_TYPE_LIRC_MODE2,
9259                                                 BPF_LIRC_MODE2),
9260         BPF_APROG_SEC("flow_dissector",         BPF_PROG_TYPE_FLOW_DISSECTOR,
9261                                                 BPF_FLOW_DISSECTOR),
9262         BPF_EAPROG_SEC("cgroup/bind4",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9263                                                 BPF_CGROUP_INET4_BIND),
9264         BPF_EAPROG_SEC("cgroup/bind6",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9265                                                 BPF_CGROUP_INET6_BIND),
9266         BPF_EAPROG_SEC("cgroup/connect4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9267                                                 BPF_CGROUP_INET4_CONNECT),
9268         BPF_EAPROG_SEC("cgroup/connect6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9269                                                 BPF_CGROUP_INET6_CONNECT),
9270         BPF_EAPROG_SEC("cgroup/sendmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9271                                                 BPF_CGROUP_UDP4_SENDMSG),
9272         BPF_EAPROG_SEC("cgroup/sendmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9273                                                 BPF_CGROUP_UDP6_SENDMSG),
9274         BPF_EAPROG_SEC("cgroup/recvmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9275                                                 BPF_CGROUP_UDP4_RECVMSG),
9276         BPF_EAPROG_SEC("cgroup/recvmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9277                                                 BPF_CGROUP_UDP6_RECVMSG),
9278         BPF_EAPROG_SEC("cgroup/getpeername4",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9279                                                 BPF_CGROUP_INET4_GETPEERNAME),
9280         BPF_EAPROG_SEC("cgroup/getpeername6",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9281                                                 BPF_CGROUP_INET6_GETPEERNAME),
9282         BPF_EAPROG_SEC("cgroup/getsockname4",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9283                                                 BPF_CGROUP_INET4_GETSOCKNAME),
9284         BPF_EAPROG_SEC("cgroup/getsockname6",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
9285                                                 BPF_CGROUP_INET6_GETSOCKNAME),
9286         BPF_EAPROG_SEC("cgroup/sysctl",         BPF_PROG_TYPE_CGROUP_SYSCTL,
9287                                                 BPF_CGROUP_SYSCTL),
9288         BPF_EAPROG_SEC("cgroup/getsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
9289                                                 BPF_CGROUP_GETSOCKOPT),
9290         BPF_EAPROG_SEC("cgroup/setsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
9291                                                 BPF_CGROUP_SETSOCKOPT),
9292         BPF_PROG_SEC("struct_ops",              BPF_PROG_TYPE_STRUCT_OPS),
9293         BPF_EAPROG_SEC("sk_lookup/",            BPF_PROG_TYPE_SK_LOOKUP,
9294                                                 BPF_SK_LOOKUP),
9295 };
9296
9297 #undef BPF_PROG_SEC_IMPL
9298 #undef BPF_PROG_SEC
9299 #undef BPF_APROG_SEC
9300 #undef BPF_EAPROG_SEC
9301 #undef BPF_APROG_COMPAT
9302 #undef SEC_DEF
9303
9304 #define MAX_TYPE_NAME_SIZE 32
9305
9306 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
9307 {
9308         int i, n = ARRAY_SIZE(section_defs);
9309
9310         for (i = 0; i < n; i++) {
9311                 if (strncmp(sec_name,
9312                             section_defs[i].sec, section_defs[i].len))
9313                         continue;
9314                 return &section_defs[i];
9315         }
9316         return NULL;
9317 }
9318
9319 static char *libbpf_get_type_names(bool attach_type)
9320 {
9321         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
9322         char *buf;
9323
9324         buf = malloc(len);
9325         if (!buf)
9326                 return NULL;
9327
9328         buf[0] = '\0';
9329         /* Forge string buf with all available names */
9330         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9331                 if (attach_type && !section_defs[i].is_attachable)
9332                         continue;
9333
9334                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
9335                         free(buf);
9336                         return NULL;
9337                 }
9338                 strcat(buf, " ");
9339                 strcat(buf, section_defs[i].sec);
9340         }
9341
9342         return buf;
9343 }
9344
9345 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
9346                              enum bpf_attach_type *expected_attach_type)
9347 {
9348         const struct bpf_sec_def *sec_def;
9349         char *type_names;
9350
9351         if (!name)
9352                 return libbpf_err(-EINVAL);
9353
9354         sec_def = find_sec_def(name);
9355         if (sec_def) {
9356                 *prog_type = sec_def->prog_type;
9357                 *expected_attach_type = sec_def->expected_attach_type;
9358                 return 0;
9359         }
9360
9361         pr_debug("failed to guess program type from ELF section '%s'\n", name);
9362         type_names = libbpf_get_type_names(false);
9363         if (type_names != NULL) {
9364                 pr_debug("supported section(type) names are:%s\n", type_names);
9365                 free(type_names);
9366         }
9367
9368         return libbpf_err(-ESRCH);
9369 }
9370
9371 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
9372                                                      size_t offset)
9373 {
9374         struct bpf_map *map;
9375         size_t i;
9376
9377         for (i = 0; i < obj->nr_maps; i++) {
9378                 map = &obj->maps[i];
9379                 if (!bpf_map__is_struct_ops(map))
9380                         continue;
9381                 if (map->sec_offset <= offset &&
9382                     offset - map->sec_offset < map->def.value_size)
9383                         return map;
9384         }
9385
9386         return NULL;
9387 }
9388
9389 /* Collect the reloc from ELF and populate the st_ops->progs[] */
9390 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
9391                                             GElf_Shdr *shdr, Elf_Data *data)
9392 {
9393         const struct btf_member *member;
9394         struct bpf_struct_ops *st_ops;
9395         struct bpf_program *prog;
9396         unsigned int shdr_idx;
9397         const struct btf *btf;
9398         struct bpf_map *map;
9399         Elf_Data *symbols;
9400         unsigned int moff, insn_idx;
9401         const char *name;
9402         __u32 member_idx;
9403         GElf_Sym sym;
9404         GElf_Rel rel;
9405         int i, nrels;
9406
9407         symbols = obj->efile.symbols;
9408         btf = obj->btf;
9409         nrels = shdr->sh_size / shdr->sh_entsize;
9410         for (i = 0; i < nrels; i++) {
9411                 if (!gelf_getrel(data, i, &rel)) {
9412                         pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
9413                         return -LIBBPF_ERRNO__FORMAT;
9414                 }
9415
9416                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
9417                         pr_warn("struct_ops reloc: symbol %zx not found\n",
9418                                 (size_t)GELF_R_SYM(rel.r_info));
9419                         return -LIBBPF_ERRNO__FORMAT;
9420                 }
9421
9422                 name = elf_sym_str(obj, sym.st_name) ?: "<?>";
9423                 map = find_struct_ops_map_by_offset(obj, rel.r_offset);
9424                 if (!map) {
9425                         pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
9426                                 (size_t)rel.r_offset);
9427                         return -EINVAL;
9428                 }
9429
9430                 moff = rel.r_offset - map->sec_offset;
9431                 shdr_idx = sym.st_shndx;
9432                 st_ops = map->st_ops;
9433                 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
9434                          map->name,
9435                          (long long)(rel.r_info >> 32),
9436                          (long long)sym.st_value,
9437                          shdr_idx, (size_t)rel.r_offset,
9438                          map->sec_offset, sym.st_name, name);
9439
9440                 if (shdr_idx >= SHN_LORESERVE) {
9441                         pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n",
9442                                 map->name, (size_t)rel.r_offset, shdr_idx);
9443                         return -LIBBPF_ERRNO__RELOC;
9444                 }
9445                 if (sym.st_value % BPF_INSN_SZ) {
9446                         pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
9447                                 map->name, (unsigned long long)sym.st_value);
9448                         return -LIBBPF_ERRNO__FORMAT;
9449                 }
9450                 insn_idx = sym.st_value / BPF_INSN_SZ;
9451
9452                 member = find_member_by_offset(st_ops->type, moff * 8);
9453                 if (!member) {
9454                         pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
9455                                 map->name, moff);
9456                         return -EINVAL;
9457                 }
9458                 member_idx = member - btf_members(st_ops->type);
9459                 name = btf__name_by_offset(btf, member->name_off);
9460
9461                 if (!resolve_func_ptr(btf, member->type, NULL)) {
9462                         pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
9463                                 map->name, name);
9464                         return -EINVAL;
9465                 }
9466
9467                 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
9468                 if (!prog) {
9469                         pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
9470                                 map->name, shdr_idx, name);
9471                         return -EINVAL;
9472                 }
9473
9474                 if (prog->type == BPF_PROG_TYPE_UNSPEC) {
9475                         const struct bpf_sec_def *sec_def;
9476
9477                         sec_def = find_sec_def(prog->sec_name);
9478                         if (sec_def &&
9479                             sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
9480                                 /* for pr_warn */
9481                                 prog->type = sec_def->prog_type;
9482                                 goto invalid_prog;
9483                         }
9484
9485                         prog->type = BPF_PROG_TYPE_STRUCT_OPS;
9486                         prog->attach_btf_id = st_ops->type_id;
9487                         prog->expected_attach_type = member_idx;
9488                 } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
9489                            prog->attach_btf_id != st_ops->type_id ||
9490                            prog->expected_attach_type != member_idx) {
9491                         goto invalid_prog;
9492                 }
9493                 st_ops->progs[member_idx] = prog;
9494         }
9495
9496         return 0;
9497
9498 invalid_prog:
9499         pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
9500                 map->name, prog->name, prog->sec_name, prog->type,
9501                 prog->attach_btf_id, prog->expected_attach_type, name);
9502         return -EINVAL;
9503 }
9504
9505 #define BTF_TRACE_PREFIX "btf_trace_"
9506 #define BTF_LSM_PREFIX "bpf_lsm_"
9507 #define BTF_ITER_PREFIX "bpf_iter_"
9508 #define BTF_MAX_NAME_SIZE 128
9509
9510 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
9511                                 const char **prefix, int *kind)
9512 {
9513         switch (attach_type) {
9514         case BPF_TRACE_RAW_TP:
9515                 *prefix = BTF_TRACE_PREFIX;
9516                 *kind = BTF_KIND_TYPEDEF;
9517                 break;
9518         case BPF_LSM_MAC:
9519                 *prefix = BTF_LSM_PREFIX;
9520                 *kind = BTF_KIND_FUNC;
9521                 break;
9522         case BPF_TRACE_ITER:
9523                 *prefix = BTF_ITER_PREFIX;
9524                 *kind = BTF_KIND_FUNC;
9525                 break;
9526         default:
9527                 *prefix = "";
9528                 *kind = BTF_KIND_FUNC;
9529         }
9530 }
9531
9532 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
9533                                    const char *name, __u32 kind)
9534 {
9535         char btf_type_name[BTF_MAX_NAME_SIZE];
9536         int ret;
9537
9538         ret = snprintf(btf_type_name, sizeof(btf_type_name),
9539                        "%s%s", prefix, name);
9540         /* snprintf returns the number of characters written excluding the
9541          * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
9542          * indicates truncation.
9543          */
9544         if (ret < 0 || ret >= sizeof(btf_type_name))
9545                 return -ENAMETOOLONG;
9546         return btf__find_by_name_kind(btf, btf_type_name, kind);
9547 }
9548
9549 static inline int find_attach_btf_id(struct btf *btf, const char *name,
9550                                      enum bpf_attach_type attach_type)
9551 {
9552         const char *prefix;
9553         int kind;
9554
9555         btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
9556         return find_btf_by_prefix_kind(btf, prefix, name, kind);
9557 }
9558
9559 int libbpf_find_vmlinux_btf_id(const char *name,
9560                                enum bpf_attach_type attach_type)
9561 {
9562         struct btf *btf;
9563         int err;
9564
9565         btf = libbpf_find_kernel_btf();
9566         err = libbpf_get_error(btf);
9567         if (err) {
9568                 pr_warn("vmlinux BTF is not found\n");
9569                 return libbpf_err(err);
9570         }
9571
9572         err = find_attach_btf_id(btf, name, attach_type);
9573         if (err <= 0)
9574                 pr_warn("%s is not found in vmlinux BTF\n", name);
9575
9576         btf__free(btf);
9577         return libbpf_err(err);
9578 }
9579
9580 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
9581 {
9582         struct bpf_prog_info_linear *info_linear;
9583         struct bpf_prog_info *info;
9584         struct btf *btf = NULL;
9585         int err = -EINVAL;
9586
9587         info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
9588         err = libbpf_get_error(info_linear);
9589         if (err) {
9590                 pr_warn("failed get_prog_info_linear for FD %d\n",
9591                         attach_prog_fd);
9592                 return err;
9593         }
9594         info = &info_linear->info;
9595         if (!info->btf_id) {
9596                 pr_warn("The target program doesn't have BTF\n");
9597                 goto out;
9598         }
9599         if (btf__get_from_id(info->btf_id, &btf)) {
9600                 pr_warn("Failed to get BTF of the program\n");
9601                 goto out;
9602         }
9603         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
9604         btf__free(btf);
9605         if (err <= 0) {
9606                 pr_warn("%s is not found in prog's BTF\n", name);
9607                 goto out;
9608         }
9609 out:
9610         free(info_linear);
9611         return err;
9612 }
9613
9614 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
9615                               enum bpf_attach_type attach_type,
9616                               int *btf_obj_fd, int *btf_type_id)
9617 {
9618         int ret, i;
9619
9620         ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
9621         if (ret > 0) {
9622                 *btf_obj_fd = 0; /* vmlinux BTF */
9623                 *btf_type_id = ret;
9624                 return 0;
9625         }
9626         if (ret != -ENOENT)
9627                 return ret;
9628
9629         ret = load_module_btfs(obj);
9630         if (ret)
9631                 return ret;
9632
9633         for (i = 0; i < obj->btf_module_cnt; i++) {
9634                 const struct module_btf *mod = &obj->btf_modules[i];
9635
9636                 ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
9637                 if (ret > 0) {
9638                         *btf_obj_fd = mod->fd;
9639                         *btf_type_id = ret;
9640                         return 0;
9641                 }
9642                 if (ret == -ENOENT)
9643                         continue;
9644
9645                 return ret;
9646         }
9647
9648         return -ESRCH;
9649 }
9650
9651 static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id)
9652 {
9653         enum bpf_attach_type attach_type = prog->expected_attach_type;
9654         __u32 attach_prog_fd = prog->attach_prog_fd;
9655         const char *name = prog->sec_name, *attach_name;
9656         const struct bpf_sec_def *sec = NULL;
9657         int i, err = 0;
9658
9659         if (!name)
9660                 return -EINVAL;
9661
9662         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9663                 if (!section_defs[i].is_attach_btf)
9664                         continue;
9665                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
9666                         continue;
9667
9668                 sec = &section_defs[i];
9669                 break;
9670         }
9671
9672         if (!sec) {
9673                 pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", name);
9674                 return -ESRCH;
9675         }
9676         attach_name = name + sec->len;
9677
9678         /* BPF program's BTF ID */
9679         if (attach_prog_fd) {
9680                 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
9681                 if (err < 0) {
9682                         pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9683                                  attach_prog_fd, attach_name, err);
9684                         return err;
9685                 }
9686                 *btf_obj_fd = 0;
9687                 *btf_type_id = err;
9688                 return 0;
9689         }
9690
9691         /* kernel/module BTF ID */
9692         if (prog->obj->gen_loader) {
9693                 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
9694                 *btf_obj_fd = 0;
9695                 *btf_type_id = 1;
9696         } else {
9697                 err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
9698         }
9699         if (err) {
9700                 pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
9701                 return err;
9702         }
9703         return 0;
9704 }
9705
9706 int libbpf_attach_type_by_name(const char *name,
9707                                enum bpf_attach_type *attach_type)
9708 {
9709         char *type_names;
9710         int i;
9711
9712         if (!name)
9713                 return libbpf_err(-EINVAL);
9714
9715         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9716                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
9717                         continue;
9718                 if (!section_defs[i].is_attachable)
9719                         return libbpf_err(-EINVAL);
9720                 *attach_type = section_defs[i].expected_attach_type;
9721                 return 0;
9722         }
9723         pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
9724         type_names = libbpf_get_type_names(true);
9725         if (type_names != NULL) {
9726                 pr_debug("attachable section(type) names are:%s\n", type_names);
9727                 free(type_names);
9728         }
9729
9730         return libbpf_err(-EINVAL);
9731 }
9732
9733 int bpf_map__fd(const struct bpf_map *map)
9734 {
9735         return map ? map->fd : libbpf_err(-EINVAL);
9736 }
9737
9738 const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
9739 {
9740         return map ? &map->def : libbpf_err_ptr(-EINVAL);
9741 }
9742
9743 const char *bpf_map__name(const struct bpf_map *map)
9744 {
9745         return map ? map->name : NULL;
9746 }
9747
9748 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
9749 {
9750         return map->def.type;
9751 }
9752
9753 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
9754 {
9755         if (map->fd >= 0)
9756                 return libbpf_err(-EBUSY);
9757         map->def.type = type;
9758         return 0;
9759 }
9760
9761 __u32 bpf_map__map_flags(const struct bpf_map *map)
9762 {
9763         return map->def.map_flags;
9764 }
9765
9766 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
9767 {
9768         if (map->fd >= 0)
9769                 return libbpf_err(-EBUSY);
9770         map->def.map_flags = flags;
9771         return 0;
9772 }
9773
9774 __u32 bpf_map__numa_node(const struct bpf_map *map)
9775 {
9776         return map->numa_node;
9777 }
9778
9779 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
9780 {
9781         if (map->fd >= 0)
9782                 return libbpf_err(-EBUSY);
9783         map->numa_node = numa_node;
9784         return 0;
9785 }
9786
9787 __u32 bpf_map__key_size(const struct bpf_map *map)
9788 {
9789         return map->def.key_size;
9790 }
9791
9792 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
9793 {
9794         if (map->fd >= 0)
9795                 return libbpf_err(-EBUSY);
9796         map->def.key_size = size;
9797         return 0;
9798 }
9799
9800 __u32 bpf_map__value_size(const struct bpf_map *map)
9801 {
9802         return map->def.value_size;
9803 }
9804
9805 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
9806 {
9807         if (map->fd >= 0)
9808                 return libbpf_err(-EBUSY);
9809         map->def.value_size = size;
9810         return 0;
9811 }
9812
9813 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
9814 {
9815         return map ? map->btf_key_type_id : 0;
9816 }
9817
9818 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
9819 {
9820         return map ? map->btf_value_type_id : 0;
9821 }
9822
9823 int bpf_map__set_priv(struct bpf_map *map, void *priv,
9824                      bpf_map_clear_priv_t clear_priv)
9825 {
9826         if (!map)
9827                 return libbpf_err(-EINVAL);
9828
9829         if (map->priv) {
9830                 if (map->clear_priv)
9831                         map->clear_priv(map, map->priv);
9832         }
9833
9834         map->priv = priv;
9835         map->clear_priv = clear_priv;
9836         return 0;
9837 }
9838
9839 void *bpf_map__priv(const struct bpf_map *map)
9840 {
9841         return map ? map->priv : libbpf_err_ptr(-EINVAL);
9842 }
9843
9844 int bpf_map__set_initial_value(struct bpf_map *map,
9845                                const void *data, size_t size)
9846 {
9847         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
9848             size != map->def.value_size || map->fd >= 0)
9849                 return libbpf_err(-EINVAL);
9850
9851         memcpy(map->mmaped, data, size);
9852         return 0;
9853 }
9854
9855 const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
9856 {
9857         if (!map->mmaped)
9858                 return NULL;
9859         *psize = map->def.value_size;
9860         return map->mmaped;
9861 }
9862
9863 bool bpf_map__is_offload_neutral(const struct bpf_map *map)
9864 {
9865         return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
9866 }
9867
9868 bool bpf_map__is_internal(const struct bpf_map *map)
9869 {
9870         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
9871 }
9872
9873 __u32 bpf_map__ifindex(const struct bpf_map *map)
9874 {
9875         return map->map_ifindex;
9876 }
9877
9878 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
9879 {
9880         if (map->fd >= 0)
9881                 return libbpf_err(-EBUSY);
9882         map->map_ifindex = ifindex;
9883         return 0;
9884 }
9885
9886 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
9887 {
9888         if (!bpf_map_type__is_map_in_map(map->def.type)) {
9889                 pr_warn("error: unsupported map type\n");
9890                 return libbpf_err(-EINVAL);
9891         }
9892         if (map->inner_map_fd != -1) {
9893                 pr_warn("error: inner_map_fd already specified\n");
9894                 return libbpf_err(-EINVAL);
9895         }
9896         zfree(&map->inner_map);
9897         map->inner_map_fd = fd;
9898         return 0;
9899 }
9900
9901 static struct bpf_map *
9902 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
9903 {
9904         ssize_t idx;
9905         struct bpf_map *s, *e;
9906
9907         if (!obj || !obj->maps)
9908                 return errno = EINVAL, NULL;
9909
9910         s = obj->maps;
9911         e = obj->maps + obj->nr_maps;
9912
9913         if ((m < s) || (m >= e)) {
9914                 pr_warn("error in %s: map handler doesn't belong to object\n",
9915                          __func__);
9916                 return errno = EINVAL, NULL;
9917         }
9918
9919         idx = (m - obj->maps) + i;
9920         if (idx >= obj->nr_maps || idx < 0)
9921                 return NULL;
9922         return &obj->maps[idx];
9923 }
9924
9925 struct bpf_map *
9926 bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
9927 {
9928         if (prev == NULL)
9929                 return obj->maps;
9930
9931         return __bpf_map__iter(prev, obj, 1);
9932 }
9933
9934 struct bpf_map *
9935 bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
9936 {
9937         if (next == NULL) {
9938                 if (!obj->nr_maps)
9939                         return NULL;
9940                 return obj->maps + obj->nr_maps - 1;
9941         }
9942
9943         return __bpf_map__iter(next, obj, -1);
9944 }
9945
9946 struct bpf_map *
9947 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
9948 {
9949         struct bpf_map *pos;
9950
9951         bpf_object__for_each_map(pos, obj) {
9952                 if (pos->name && !strcmp(pos->name, name))
9953                         return pos;
9954         }
9955         return errno = ENOENT, NULL;
9956 }
9957
9958 int
9959 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
9960 {
9961         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
9962 }
9963
9964 struct bpf_map *
9965 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
9966 {
9967         return libbpf_err_ptr(-ENOTSUP);
9968 }
9969
9970 long libbpf_get_error(const void *ptr)
9971 {
9972         if (!IS_ERR_OR_NULL(ptr))
9973                 return 0;
9974
9975         if (IS_ERR(ptr))
9976                 errno = -PTR_ERR(ptr);
9977
9978         /* If ptr == NULL, then errno should be already set by the failing
9979          * API, because libbpf never returns NULL on success and it now always
9980          * sets errno on error. So no extra errno handling for ptr == NULL
9981          * case.
9982          */
9983         return -errno;
9984 }
9985
9986 int bpf_prog_load(const char *file, enum bpf_prog_type type,
9987                   struct bpf_object **pobj, int *prog_fd)
9988 {
9989         struct bpf_prog_load_attr attr;
9990
9991         memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
9992         attr.file = file;
9993         attr.prog_type = type;
9994         attr.expected_attach_type = 0;
9995
9996         return bpf_prog_load_xattr(&attr, pobj, prog_fd);
9997 }
9998
9999 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
10000                         struct bpf_object **pobj, int *prog_fd)
10001 {
10002         struct bpf_object_open_attr open_attr = {};
10003         struct bpf_program *prog, *first_prog = NULL;
10004         struct bpf_object *obj;
10005         struct bpf_map *map;
10006         int err;
10007
10008         if (!attr)
10009                 return libbpf_err(-EINVAL);
10010         if (!attr->file)
10011                 return libbpf_err(-EINVAL);
10012
10013         open_attr.file = attr->file;
10014         open_attr.prog_type = attr->prog_type;
10015
10016         obj = bpf_object__open_xattr(&open_attr);
10017         err = libbpf_get_error(obj);
10018         if (err)
10019                 return libbpf_err(-ENOENT);
10020
10021         bpf_object__for_each_program(prog, obj) {
10022                 enum bpf_attach_type attach_type = attr->expected_attach_type;
10023                 /*
10024                  * to preserve backwards compatibility, bpf_prog_load treats
10025                  * attr->prog_type, if specified, as an override to whatever
10026                  * bpf_object__open guessed
10027                  */
10028                 if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
10029                         bpf_program__set_type(prog, attr->prog_type);
10030                         bpf_program__set_expected_attach_type(prog,
10031                                                               attach_type);
10032                 }
10033                 if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
10034                         /*
10035                          * we haven't guessed from section name and user
10036                          * didn't provide a fallback type, too bad...
10037                          */
10038                         bpf_object__close(obj);
10039                         return libbpf_err(-EINVAL);
10040                 }
10041
10042                 prog->prog_ifindex = attr->ifindex;
10043                 prog->log_level = attr->log_level;
10044                 prog->prog_flags |= attr->prog_flags;
10045                 if (!first_prog)
10046                         first_prog = prog;
10047         }
10048
10049         bpf_object__for_each_map(map, obj) {
10050                 if (!bpf_map__is_offload_neutral(map))
10051                         map->map_ifindex = attr->ifindex;
10052         }
10053
10054         if (!first_prog) {
10055                 pr_warn("object file doesn't contain bpf program\n");
10056                 bpf_object__close(obj);
10057                 return libbpf_err(-ENOENT);
10058         }
10059
10060         err = bpf_object__load(obj);
10061         if (err) {
10062                 bpf_object__close(obj);
10063                 return libbpf_err(err);
10064         }
10065
10066         *pobj = obj;
10067         *prog_fd = bpf_program__fd(first_prog);
10068         return 0;
10069 }
10070
10071 struct bpf_link {
10072         int (*detach)(struct bpf_link *link);
10073         int (*destroy)(struct bpf_link *link);
10074         char *pin_path;         /* NULL, if not pinned */
10075         int fd;                 /* hook FD, -1 if not applicable */
10076         bool disconnected;
10077 };
10078
10079 /* Replace link's underlying BPF program with the new one */
10080 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
10081 {
10082         int ret;
10083         
10084         ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
10085         return libbpf_err_errno(ret);
10086 }
10087
10088 /* Release "ownership" of underlying BPF resource (typically, BPF program
10089  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
10090  * link, when destructed through bpf_link__destroy() call won't attempt to
10091  * detach/unregisted that BPF resource. This is useful in situations where,
10092  * say, attached BPF program has to outlive userspace program that attached it
10093  * in the system. Depending on type of BPF program, though, there might be
10094  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
10095  * exit of userspace program doesn't trigger automatic detachment and clean up
10096  * inside the kernel.
10097  */
10098 void bpf_link__disconnect(struct bpf_link *link)
10099 {
10100         link->disconnected = true;
10101 }
10102
10103 int bpf_link__destroy(struct bpf_link *link)
10104 {
10105         int err = 0;
10106
10107         if (IS_ERR_OR_NULL(link))
10108                 return 0;
10109
10110         if (!link->disconnected && link->detach)
10111                 err = link->detach(link);
10112         if (link->destroy)
10113                 link->destroy(link);
10114         if (link->pin_path)
10115                 free(link->pin_path);
10116         free(link);
10117
10118         return libbpf_err(err);
10119 }
10120
10121 int bpf_link__fd(const struct bpf_link *link)
10122 {
10123         return link->fd;
10124 }
10125
10126 const char *bpf_link__pin_path(const struct bpf_link *link)
10127 {
10128         return link->pin_path;
10129 }
10130
10131 static int bpf_link__detach_fd(struct bpf_link *link)
10132 {
10133         return libbpf_err_errno(close(link->fd));
10134 }
10135
10136 struct bpf_link *bpf_link__open(const char *path)
10137 {
10138         struct bpf_link *link;
10139         int fd;
10140
10141         fd = bpf_obj_get(path);
10142         if (fd < 0) {
10143                 fd = -errno;
10144                 pr_warn("failed to open link at %s: %d\n", path, fd);
10145                 return libbpf_err_ptr(fd);
10146         }
10147
10148         link = calloc(1, sizeof(*link));
10149         if (!link) {
10150                 close(fd);
10151                 return libbpf_err_ptr(-ENOMEM);
10152         }
10153         link->detach = &bpf_link__detach_fd;
10154         link->fd = fd;
10155
10156         link->pin_path = strdup(path);
10157         if (!link->pin_path) {
10158                 bpf_link__destroy(link);
10159                 return libbpf_err_ptr(-ENOMEM);
10160         }
10161
10162         return link;
10163 }
10164
10165 int bpf_link__detach(struct bpf_link *link)
10166 {
10167         return bpf_link_detach(link->fd) ? -errno : 0;
10168 }
10169
10170 int bpf_link__pin(struct bpf_link *link, const char *path)
10171 {
10172         int err;
10173
10174         if (link->pin_path)
10175                 return libbpf_err(-EBUSY);
10176         err = make_parent_dir(path);
10177         if (err)
10178                 return libbpf_err(err);
10179         err = check_path(path);
10180         if (err)
10181                 return libbpf_err(err);
10182
10183         link->pin_path = strdup(path);
10184         if (!link->pin_path)
10185                 return libbpf_err(-ENOMEM);
10186
10187         if (bpf_obj_pin(link->fd, link->pin_path)) {
10188                 err = -errno;
10189                 zfree(&link->pin_path);
10190                 return libbpf_err(err);
10191         }
10192
10193         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
10194         return 0;
10195 }
10196
10197 int bpf_link__unpin(struct bpf_link *link)
10198 {
10199         int err;
10200
10201         if (!link->pin_path)
10202                 return libbpf_err(-EINVAL);
10203
10204         err = unlink(link->pin_path);
10205         if (err != 0)
10206                 return -errno;
10207
10208         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
10209         zfree(&link->pin_path);
10210         return 0;
10211 }
10212
10213 static int bpf_link__detach_perf_event(struct bpf_link *link)
10214 {
10215         int err;
10216
10217         err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
10218         if (err)
10219                 err = -errno;
10220
10221         close(link->fd);
10222         return libbpf_err(err);
10223 }
10224
10225 struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
10226 {
10227         char errmsg[STRERR_BUFSIZE];
10228         struct bpf_link *link;
10229         int prog_fd, err;
10230
10231         if (pfd < 0) {
10232                 pr_warn("prog '%s': invalid perf event FD %d\n",
10233                         prog->name, pfd);
10234                 return libbpf_err_ptr(-EINVAL);
10235         }
10236         prog_fd = bpf_program__fd(prog);
10237         if (prog_fd < 0) {
10238                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
10239                         prog->name);
10240                 return libbpf_err_ptr(-EINVAL);
10241         }
10242
10243         link = calloc(1, sizeof(*link));
10244         if (!link)
10245                 return libbpf_err_ptr(-ENOMEM);
10246         link->detach = &bpf_link__detach_perf_event;
10247         link->fd = pfd;
10248
10249         if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
10250                 err = -errno;
10251                 free(link);
10252                 pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
10253                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10254                 if (err == -EPROTO)
10255                         pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
10256                                 prog->name, pfd);
10257                 return libbpf_err_ptr(err);
10258         }
10259         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10260                 err = -errno;
10261                 free(link);
10262                 pr_warn("prog '%s': failed to enable pfd %d: %s\n",
10263                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10264                 return libbpf_err_ptr(err);
10265         }
10266         return link;
10267 }
10268
10269 /*
10270  * this function is expected to parse integer in the range of [0, 2^31-1] from
10271  * given file using scanf format string fmt. If actual parsed value is
10272  * negative, the result might be indistinguishable from error
10273  */
10274 static int parse_uint_from_file(const char *file, const char *fmt)
10275 {
10276         char buf[STRERR_BUFSIZE];
10277         int err, ret;
10278         FILE *f;
10279
10280         f = fopen(file, "r");
10281         if (!f) {
10282                 err = -errno;
10283                 pr_debug("failed to open '%s': %s\n", file,
10284                          libbpf_strerror_r(err, buf, sizeof(buf)));
10285                 return err;
10286         }
10287         err = fscanf(f, fmt, &ret);
10288         if (err != 1) {
10289                 err = err == EOF ? -EIO : -errno;
10290                 pr_debug("failed to parse '%s': %s\n", file,
10291                         libbpf_strerror_r(err, buf, sizeof(buf)));
10292                 fclose(f);
10293                 return err;
10294         }
10295         fclose(f);
10296         return ret;
10297 }
10298
10299 static int determine_kprobe_perf_type(void)
10300 {
10301         const char *file = "/sys/bus/event_source/devices/kprobe/type";
10302
10303         return parse_uint_from_file(file, "%d\n");
10304 }
10305
10306 static int determine_uprobe_perf_type(void)
10307 {
10308         const char *file = "/sys/bus/event_source/devices/uprobe/type";
10309
10310         return parse_uint_from_file(file, "%d\n");
10311 }
10312
10313 static int determine_kprobe_retprobe_bit(void)
10314 {
10315         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
10316
10317         return parse_uint_from_file(file, "config:%d\n");
10318 }
10319
10320 static int determine_uprobe_retprobe_bit(void)
10321 {
10322         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
10323
10324         return parse_uint_from_file(file, "config:%d\n");
10325 }
10326
10327 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
10328                                  uint64_t offset, int pid)
10329 {
10330         struct perf_event_attr attr = {};
10331         char errmsg[STRERR_BUFSIZE];
10332         int type, pfd, err;
10333
10334         type = uprobe ? determine_uprobe_perf_type()
10335                       : determine_kprobe_perf_type();
10336         if (type < 0) {
10337                 pr_warn("failed to determine %s perf type: %s\n",
10338                         uprobe ? "uprobe" : "kprobe",
10339                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
10340                 return type;
10341         }
10342         if (retprobe) {
10343                 int bit = uprobe ? determine_uprobe_retprobe_bit()
10344                                  : determine_kprobe_retprobe_bit();
10345
10346                 if (bit < 0) {
10347                         pr_warn("failed to determine %s retprobe bit: %s\n",
10348                                 uprobe ? "uprobe" : "kprobe",
10349                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
10350                         return bit;
10351                 }
10352                 attr.config |= 1 << bit;
10353         }
10354         attr.size = sizeof(attr);
10355         attr.type = type;
10356         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
10357         attr.config2 = offset;           /* kprobe_addr or probe_offset */
10358
10359         /* pid filter is meaningful only for uprobes */
10360         pfd = syscall(__NR_perf_event_open, &attr,
10361                       pid < 0 ? -1 : pid /* pid */,
10362                       pid == -1 ? 0 : -1 /* cpu */,
10363                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10364         if (pfd < 0) {
10365                 err = -errno;
10366                 pr_warn("%s perf_event_open() failed: %s\n",
10367                         uprobe ? "uprobe" : "kprobe",
10368                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10369                 return err;
10370         }
10371         return pfd;
10372 }
10373
10374 struct bpf_link *
10375 bpf_program__attach_kprobe_opts(struct bpf_program *prog,
10376                                 const char *func_name,
10377                                 struct bpf_kprobe_opts *opts)
10378 {
10379         char errmsg[STRERR_BUFSIZE];
10380         struct bpf_link *link;
10381         unsigned long offset;
10382         bool retprobe;
10383         int pfd, err;
10384
10385         if (!OPTS_VALID(opts, bpf_kprobe_opts))
10386                 return libbpf_err_ptr(-EINVAL);
10387
10388         retprobe = OPTS_GET(opts, retprobe, false);
10389         offset = OPTS_GET(opts, offset, 0);
10390
10391         pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
10392                                     offset, -1 /* pid */);
10393         if (pfd < 0) {
10394                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
10395                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
10396                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10397                 return libbpf_err_ptr(pfd);
10398         }
10399         link = bpf_program__attach_perf_event(prog, pfd);
10400         err = libbpf_get_error(link);
10401         if (err) {
10402                 close(pfd);
10403                 pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
10404                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
10405                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10406                 return libbpf_err_ptr(err);
10407         }
10408         return link;
10409 }
10410
10411 struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
10412                                             bool retprobe,
10413                                             const char *func_name)
10414 {
10415         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
10416                 .retprobe = retprobe,
10417         );
10418
10419         return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
10420 }
10421
10422 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
10423                                       struct bpf_program *prog)
10424 {
10425         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
10426         unsigned long offset = 0;
10427         struct bpf_link *link;
10428         const char *func_name;
10429         char *func;
10430         int n, err;
10431
10432         func_name = prog->sec_name + sec->len;
10433         opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0;
10434
10435         n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
10436         if (n < 1) {
10437                 err = -EINVAL;
10438                 pr_warn("kprobe name is invalid: %s\n", func_name);
10439                 return libbpf_err_ptr(err);
10440         }
10441         if (opts.retprobe && offset != 0) {
10442                 free(func);
10443                 err = -EINVAL;
10444                 pr_warn("kretprobes do not support offset specification\n");
10445                 return libbpf_err_ptr(err);
10446         }
10447
10448         opts.offset = offset;
10449         link = bpf_program__attach_kprobe_opts(prog, func, &opts);
10450         free(func);
10451         return link;
10452 }
10453
10454 struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
10455                                             bool retprobe, pid_t pid,
10456                                             const char *binary_path,
10457                                             size_t func_offset)
10458 {
10459         char errmsg[STRERR_BUFSIZE];
10460         struct bpf_link *link;
10461         int pfd, err;
10462
10463         pfd = perf_event_open_probe(true /* uprobe */, retprobe,
10464                                     binary_path, func_offset, pid);
10465         if (pfd < 0) {
10466                 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
10467                         prog->name, retprobe ? "uretprobe" : "uprobe",
10468                         binary_path, func_offset,
10469                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10470                 return libbpf_err_ptr(pfd);
10471         }
10472         link = bpf_program__attach_perf_event(prog, pfd);
10473         err = libbpf_get_error(link);
10474         if (err) {
10475                 close(pfd);
10476                 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
10477                         prog->name, retprobe ? "uretprobe" : "uprobe",
10478                         binary_path, func_offset,
10479                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10480                 return libbpf_err_ptr(err);
10481         }
10482         return link;
10483 }
10484
10485 static int determine_tracepoint_id(const char *tp_category,
10486                                    const char *tp_name)
10487 {
10488         char file[PATH_MAX];
10489         int ret;
10490
10491         ret = snprintf(file, sizeof(file),
10492                        "/sys/kernel/debug/tracing/events/%s/%s/id",
10493                        tp_category, tp_name);
10494         if (ret < 0)
10495                 return -errno;
10496         if (ret >= sizeof(file)) {
10497                 pr_debug("tracepoint %s/%s path is too long\n",
10498                          tp_category, tp_name);
10499                 return -E2BIG;
10500         }
10501         return parse_uint_from_file(file, "%d\n");
10502 }
10503
10504 static int perf_event_open_tracepoint(const char *tp_category,
10505                                       const char *tp_name)
10506 {
10507         struct perf_event_attr attr = {};
10508         char errmsg[STRERR_BUFSIZE];
10509         int tp_id, pfd, err;
10510
10511         tp_id = determine_tracepoint_id(tp_category, tp_name);
10512         if (tp_id < 0) {
10513                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
10514                         tp_category, tp_name,
10515                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
10516                 return tp_id;
10517         }
10518
10519         attr.type = PERF_TYPE_TRACEPOINT;
10520         attr.size = sizeof(attr);
10521         attr.config = tp_id;
10522
10523         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
10524                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10525         if (pfd < 0) {
10526                 err = -errno;
10527                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
10528                         tp_category, tp_name,
10529                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10530                 return err;
10531         }
10532         return pfd;
10533 }
10534
10535 struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
10536                                                 const char *tp_category,
10537                                                 const char *tp_name)
10538 {
10539         char errmsg[STRERR_BUFSIZE];
10540         struct bpf_link *link;
10541         int pfd, err;
10542
10543         pfd = perf_event_open_tracepoint(tp_category, tp_name);
10544         if (pfd < 0) {
10545                 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
10546                         prog->name, tp_category, tp_name,
10547                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10548                 return libbpf_err_ptr(pfd);
10549         }
10550         link = bpf_program__attach_perf_event(prog, pfd);
10551         err = libbpf_get_error(link);
10552         if (err) {
10553                 close(pfd);
10554                 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
10555                         prog->name, tp_category, tp_name,
10556                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10557                 return libbpf_err_ptr(err);
10558         }
10559         return link;
10560 }
10561
10562 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
10563                                   struct bpf_program *prog)
10564 {
10565         char *sec_name, *tp_cat, *tp_name;
10566         struct bpf_link *link;
10567
10568         sec_name = strdup(prog->sec_name);
10569         if (!sec_name)
10570                 return libbpf_err_ptr(-ENOMEM);
10571
10572         /* extract "tp/<category>/<name>" */
10573         tp_cat = sec_name + sec->len;
10574         tp_name = strchr(tp_cat, '/');
10575         if (!tp_name) {
10576                 free(sec_name);
10577                 return libbpf_err_ptr(-EINVAL);
10578         }
10579         *tp_name = '\0';
10580         tp_name++;
10581
10582         link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
10583         free(sec_name);
10584         return link;
10585 }
10586
10587 struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
10588                                                     const char *tp_name)
10589 {
10590         char errmsg[STRERR_BUFSIZE];
10591         struct bpf_link *link;
10592         int prog_fd, pfd;
10593
10594         prog_fd = bpf_program__fd(prog);
10595         if (prog_fd < 0) {
10596                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10597                 return libbpf_err_ptr(-EINVAL);
10598         }
10599
10600         link = calloc(1, sizeof(*link));
10601         if (!link)
10602                 return libbpf_err_ptr(-ENOMEM);
10603         link->detach = &bpf_link__detach_fd;
10604
10605         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
10606         if (pfd < 0) {
10607                 pfd = -errno;
10608                 free(link);
10609                 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
10610                         prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10611                 return libbpf_err_ptr(pfd);
10612         }
10613         link->fd = pfd;
10614         return link;
10615 }
10616
10617 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
10618                                       struct bpf_program *prog)
10619 {
10620         const char *tp_name = prog->sec_name + sec->len;
10621
10622         return bpf_program__attach_raw_tracepoint(prog, tp_name);
10623 }
10624
10625 /* Common logic for all BPF program types that attach to a btf_id */
10626 static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
10627 {
10628         char errmsg[STRERR_BUFSIZE];
10629         struct bpf_link *link;
10630         int prog_fd, pfd;
10631
10632         prog_fd = bpf_program__fd(prog);
10633         if (prog_fd < 0) {
10634                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10635                 return libbpf_err_ptr(-EINVAL);
10636         }
10637
10638         link = calloc(1, sizeof(*link));
10639         if (!link)
10640                 return libbpf_err_ptr(-ENOMEM);
10641         link->detach = &bpf_link__detach_fd;
10642
10643         pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
10644         if (pfd < 0) {
10645                 pfd = -errno;
10646                 free(link);
10647                 pr_warn("prog '%s': failed to attach: %s\n",
10648                         prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10649                 return libbpf_err_ptr(pfd);
10650         }
10651         link->fd = pfd;
10652         return (struct bpf_link *)link;
10653 }
10654
10655 struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
10656 {
10657         return bpf_program__attach_btf_id(prog);
10658 }
10659
10660 struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
10661 {
10662         return bpf_program__attach_btf_id(prog);
10663 }
10664
10665 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
10666                                      struct bpf_program *prog)
10667 {
10668         return bpf_program__attach_trace(prog);
10669 }
10670
10671 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
10672                                    struct bpf_program *prog)
10673 {
10674         return bpf_program__attach_lsm(prog);
10675 }
10676
10677 static struct bpf_link *
10678 bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
10679                        const char *target_name)
10680 {
10681         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
10682                             .target_btf_id = btf_id);
10683         enum bpf_attach_type attach_type;
10684         char errmsg[STRERR_BUFSIZE];
10685         struct bpf_link *link;
10686         int prog_fd, link_fd;
10687
10688         prog_fd = bpf_program__fd(prog);
10689         if (prog_fd < 0) {
10690                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10691                 return libbpf_err_ptr(-EINVAL);
10692         }
10693
10694         link = calloc(1, sizeof(*link));
10695         if (!link)
10696                 return libbpf_err_ptr(-ENOMEM);
10697         link->detach = &bpf_link__detach_fd;
10698
10699         attach_type = bpf_program__get_expected_attach_type(prog);
10700         link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
10701         if (link_fd < 0) {
10702                 link_fd = -errno;
10703                 free(link);
10704                 pr_warn("prog '%s': failed to attach to %s: %s\n",
10705                         prog->name, target_name,
10706                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
10707                 return libbpf_err_ptr(link_fd);
10708         }
10709         link->fd = link_fd;
10710         return link;
10711 }
10712
10713 struct bpf_link *
10714 bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
10715 {
10716         return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
10717 }
10718
10719 struct bpf_link *
10720 bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
10721 {
10722         return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
10723 }
10724
10725 struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
10726 {
10727         /* target_fd/target_ifindex use the same field in LINK_CREATE */
10728         return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
10729 }
10730
10731 struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
10732                                               int target_fd,
10733                                               const char *attach_func_name)
10734 {
10735         int btf_id;
10736
10737         if (!!target_fd != !!attach_func_name) {
10738                 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
10739                         prog->name);
10740                 return libbpf_err_ptr(-EINVAL);
10741         }
10742
10743         if (prog->type != BPF_PROG_TYPE_EXT) {
10744                 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
10745                         prog->name);
10746                 return libbpf_err_ptr(-EINVAL);
10747         }
10748
10749         if (target_fd) {
10750                 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
10751                 if (btf_id < 0)
10752                         return libbpf_err_ptr(btf_id);
10753
10754                 return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
10755         } else {
10756                 /* no target, so use raw_tracepoint_open for compatibility
10757                  * with old kernels
10758                  */
10759                 return bpf_program__attach_trace(prog);
10760         }
10761 }
10762
10763 struct bpf_link *
10764 bpf_program__attach_iter(struct bpf_program *prog,
10765                          const struct bpf_iter_attach_opts *opts)
10766 {
10767         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
10768         char errmsg[STRERR_BUFSIZE];
10769         struct bpf_link *link;
10770         int prog_fd, link_fd;
10771         __u32 target_fd = 0;
10772
10773         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
10774                 return libbpf_err_ptr(-EINVAL);
10775
10776         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
10777         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
10778
10779         prog_fd = bpf_program__fd(prog);
10780         if (prog_fd < 0) {
10781                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10782                 return libbpf_err_ptr(-EINVAL);
10783         }
10784
10785         link = calloc(1, sizeof(*link));
10786         if (!link)
10787                 return libbpf_err_ptr(-ENOMEM);
10788         link->detach = &bpf_link__detach_fd;
10789
10790         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
10791                                   &link_create_opts);
10792         if (link_fd < 0) {
10793                 link_fd = -errno;
10794                 free(link);
10795                 pr_warn("prog '%s': failed to attach to iterator: %s\n",
10796                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
10797                 return libbpf_err_ptr(link_fd);
10798         }
10799         link->fd = link_fd;
10800         return link;
10801 }
10802
10803 static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
10804                                     struct bpf_program *prog)
10805 {
10806         return bpf_program__attach_iter(prog, NULL);
10807 }
10808
10809 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
10810 {
10811         const struct bpf_sec_def *sec_def;
10812
10813         sec_def = find_sec_def(prog->sec_name);
10814         if (!sec_def || !sec_def->attach_fn)
10815                 return libbpf_err_ptr(-ESRCH);
10816
10817         return sec_def->attach_fn(sec_def, prog);
10818 }
10819
10820 static int bpf_link__detach_struct_ops(struct bpf_link *link)
10821 {
10822         __u32 zero = 0;
10823
10824         if (bpf_map_delete_elem(link->fd, &zero))
10825                 return -errno;
10826
10827         return 0;
10828 }
10829
10830 struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
10831 {
10832         struct bpf_struct_ops *st_ops;
10833         struct bpf_link *link;
10834         __u32 i, zero = 0;
10835         int err;
10836
10837         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
10838                 return libbpf_err_ptr(-EINVAL);
10839
10840         link = calloc(1, sizeof(*link));
10841         if (!link)
10842                 return libbpf_err_ptr(-EINVAL);
10843
10844         st_ops = map->st_ops;
10845         for (i = 0; i < btf_vlen(st_ops->type); i++) {
10846                 struct bpf_program *prog = st_ops->progs[i];
10847                 void *kern_data;
10848                 int prog_fd;
10849
10850                 if (!prog)
10851                         continue;
10852
10853                 prog_fd = bpf_program__fd(prog);
10854                 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
10855                 *(unsigned long *)kern_data = prog_fd;
10856         }
10857
10858         err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
10859         if (err) {
10860                 err = -errno;
10861                 free(link);
10862                 return libbpf_err_ptr(err);
10863         }
10864
10865         link->detach = bpf_link__detach_struct_ops;
10866         link->fd = map->fd;
10867
10868         return link;
10869 }
10870
10871 enum bpf_perf_event_ret
10872 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
10873                            void **copy_mem, size_t *copy_size,
10874                            bpf_perf_event_print_t fn, void *private_data)
10875 {
10876         struct perf_event_mmap_page *header = mmap_mem;
10877         __u64 data_head = ring_buffer_read_head(header);
10878         __u64 data_tail = header->data_tail;
10879         void *base = ((__u8 *)header) + page_size;
10880         int ret = LIBBPF_PERF_EVENT_CONT;
10881         struct perf_event_header *ehdr;
10882         size_t ehdr_size;
10883
10884         while (data_head != data_tail) {
10885                 ehdr = base + (data_tail & (mmap_size - 1));
10886                 ehdr_size = ehdr->size;
10887
10888                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
10889                         void *copy_start = ehdr;
10890                         size_t len_first = base + mmap_size - copy_start;
10891                         size_t len_secnd = ehdr_size - len_first;
10892
10893                         if (*copy_size < ehdr_size) {
10894                                 free(*copy_mem);
10895                                 *copy_mem = malloc(ehdr_size);
10896                                 if (!*copy_mem) {
10897                                         *copy_size = 0;
10898                                         ret = LIBBPF_PERF_EVENT_ERROR;
10899                                         break;
10900                                 }
10901                                 *copy_size = ehdr_size;
10902                         }
10903
10904                         memcpy(*copy_mem, copy_start, len_first);
10905                         memcpy(*copy_mem + len_first, base, len_secnd);
10906                         ehdr = *copy_mem;
10907                 }
10908
10909                 ret = fn(ehdr, private_data);
10910                 data_tail += ehdr_size;
10911                 if (ret != LIBBPF_PERF_EVENT_CONT)
10912                         break;
10913         }
10914
10915         ring_buffer_write_tail(header, data_tail);
10916         return libbpf_err(ret);
10917 }
10918
10919 struct perf_buffer;
10920
10921 struct perf_buffer_params {
10922         struct perf_event_attr *attr;
10923         /* if event_cb is specified, it takes precendence */
10924         perf_buffer_event_fn event_cb;
10925         /* sample_cb and lost_cb are higher-level common-case callbacks */
10926         perf_buffer_sample_fn sample_cb;
10927         perf_buffer_lost_fn lost_cb;
10928         void *ctx;
10929         int cpu_cnt;
10930         int *cpus;
10931         int *map_keys;
10932 };
10933
10934 struct perf_cpu_buf {
10935         struct perf_buffer *pb;
10936         void *base; /* mmap()'ed memory */
10937         void *buf; /* for reconstructing segmented data */
10938         size_t buf_size;
10939         int fd;
10940         int cpu;
10941         int map_key;
10942 };
10943
10944 struct perf_buffer {
10945         perf_buffer_event_fn event_cb;
10946         perf_buffer_sample_fn sample_cb;
10947         perf_buffer_lost_fn lost_cb;
10948         void *ctx; /* passed into callbacks */
10949
10950         size_t page_size;
10951         size_t mmap_size;
10952         struct perf_cpu_buf **cpu_bufs;
10953         struct epoll_event *events;
10954         int cpu_cnt; /* number of allocated CPU buffers */
10955         int epoll_fd; /* perf event FD */
10956         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
10957 };
10958
10959 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
10960                                       struct perf_cpu_buf *cpu_buf)
10961 {
10962         if (!cpu_buf)
10963                 return;
10964         if (cpu_buf->base &&
10965             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
10966                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
10967         if (cpu_buf->fd >= 0) {
10968                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
10969                 close(cpu_buf->fd);
10970         }
10971         free(cpu_buf->buf);
10972         free(cpu_buf);
10973 }
10974
10975 void perf_buffer__free(struct perf_buffer *pb)
10976 {
10977         int i;
10978
10979         if (IS_ERR_OR_NULL(pb))
10980                 return;
10981         if (pb->cpu_bufs) {
10982                 for (i = 0; i < pb->cpu_cnt; i++) {
10983                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
10984
10985                         if (!cpu_buf)
10986                                 continue;
10987
10988                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
10989                         perf_buffer__free_cpu_buf(pb, cpu_buf);
10990                 }
10991                 free(pb->cpu_bufs);
10992         }
10993         if (pb->epoll_fd >= 0)
10994                 close(pb->epoll_fd);
10995         free(pb->events);
10996         free(pb);
10997 }
10998
10999 static struct perf_cpu_buf *
11000 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
11001                           int cpu, int map_key)
11002 {
11003         struct perf_cpu_buf *cpu_buf;
11004         char msg[STRERR_BUFSIZE];
11005         int err;
11006
11007         cpu_buf = calloc(1, sizeof(*cpu_buf));
11008         if (!cpu_buf)
11009                 return ERR_PTR(-ENOMEM);
11010
11011         cpu_buf->pb = pb;
11012         cpu_buf->cpu = cpu;
11013         cpu_buf->map_key = map_key;
11014
11015         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
11016                               -1, PERF_FLAG_FD_CLOEXEC);
11017         if (cpu_buf->fd < 0) {
11018                 err = -errno;
11019                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
11020                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11021                 goto error;
11022         }
11023
11024         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
11025                              PROT_READ | PROT_WRITE, MAP_SHARED,
11026                              cpu_buf->fd, 0);
11027         if (cpu_buf->base == MAP_FAILED) {
11028                 cpu_buf->base = NULL;
11029                 err = -errno;
11030                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
11031                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11032                 goto error;
11033         }
11034
11035         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
11036                 err = -errno;
11037                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
11038                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11039                 goto error;
11040         }
11041
11042         return cpu_buf;
11043
11044 error:
11045         perf_buffer__free_cpu_buf(pb, cpu_buf);
11046         return (struct perf_cpu_buf *)ERR_PTR(err);
11047 }
11048
11049 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
11050                                               struct perf_buffer_params *p);
11051
11052 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
11053                                      const struct perf_buffer_opts *opts)
11054 {
11055         struct perf_buffer_params p = {};
11056         struct perf_event_attr attr = { 0, };
11057
11058         attr.config = PERF_COUNT_SW_BPF_OUTPUT;
11059         attr.type = PERF_TYPE_SOFTWARE;
11060         attr.sample_type = PERF_SAMPLE_RAW;
11061         attr.sample_period = 1;
11062         attr.wakeup_events = 1;
11063
11064         p.attr = &attr;
11065         p.sample_cb = opts ? opts->sample_cb : NULL;
11066         p.lost_cb = opts ? opts->lost_cb : NULL;
11067         p.ctx = opts ? opts->ctx : NULL;
11068
11069         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
11070 }
11071
11072 struct perf_buffer *
11073 perf_buffer__new_raw(int map_fd, size_t page_cnt,
11074                      const struct perf_buffer_raw_opts *opts)
11075 {
11076         struct perf_buffer_params p = {};
11077
11078         p.attr = opts->attr;
11079         p.event_cb = opts->event_cb;
11080         p.ctx = opts->ctx;
11081         p.cpu_cnt = opts->cpu_cnt;
11082         p.cpus = opts->cpus;
11083         p.map_keys = opts->map_keys;
11084
11085         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
11086 }
11087
11088 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
11089                                               struct perf_buffer_params *p)
11090 {
11091         const char *online_cpus_file = "/sys/devices/system/cpu/online";
11092         struct bpf_map_info map;
11093         char msg[STRERR_BUFSIZE];
11094         struct perf_buffer *pb;
11095         bool *online = NULL;
11096         __u32 map_info_len;
11097         int err, i, j, n;
11098
11099         if (page_cnt & (page_cnt - 1)) {
11100                 pr_warn("page count should be power of two, but is %zu\n",
11101                         page_cnt);
11102                 return ERR_PTR(-EINVAL);
11103         }
11104
11105         /* best-effort sanity checks */
11106         memset(&map, 0, sizeof(map));
11107         map_info_len = sizeof(map);
11108         err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
11109         if (err) {
11110                 err = -errno;
11111                 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
11112                  * -EBADFD, -EFAULT, or -E2BIG on real error
11113                  */
11114                 if (err != -EINVAL) {
11115                         pr_warn("failed to get map info for map FD %d: %s\n",
11116                                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
11117                         return ERR_PTR(err);
11118                 }
11119                 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
11120                          map_fd);
11121         } else {
11122                 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
11123                         pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
11124                                 map.name);
11125                         return ERR_PTR(-EINVAL);
11126                 }
11127         }
11128
11129         pb = calloc(1, sizeof(*pb));
11130         if (!pb)
11131                 return ERR_PTR(-ENOMEM);
11132
11133         pb->event_cb = p->event_cb;
11134         pb->sample_cb = p->sample_cb;
11135         pb->lost_cb = p->lost_cb;
11136         pb->ctx = p->ctx;
11137
11138         pb->page_size = getpagesize();
11139         pb->mmap_size = pb->page_size * page_cnt;
11140         pb->map_fd = map_fd;
11141
11142         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
11143         if (pb->epoll_fd < 0) {
11144                 err = -errno;
11145                 pr_warn("failed to create epoll instance: %s\n",
11146                         libbpf_strerror_r(err, msg, sizeof(msg)));
11147                 goto error;
11148         }
11149
11150         if (p->cpu_cnt > 0) {
11151                 pb->cpu_cnt = p->cpu_cnt;
11152         } else {
11153                 pb->cpu_cnt = libbpf_num_possible_cpus();
11154                 if (pb->cpu_cnt < 0) {
11155                         err = pb->cpu_cnt;
11156                         goto error;
11157                 }
11158                 if (map.max_entries && map.max_entries < pb->cpu_cnt)
11159                         pb->cpu_cnt = map.max_entries;
11160         }
11161
11162         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
11163         if (!pb->events) {
11164                 err = -ENOMEM;
11165                 pr_warn("failed to allocate events: out of memory\n");
11166                 goto error;
11167         }
11168         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
11169         if (!pb->cpu_bufs) {
11170                 err = -ENOMEM;
11171                 pr_warn("failed to allocate buffers: out of memory\n");
11172                 goto error;
11173         }
11174
11175         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
11176         if (err) {
11177                 pr_warn("failed to get online CPU mask: %d\n", err);
11178                 goto error;
11179         }
11180
11181         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
11182                 struct perf_cpu_buf *cpu_buf;
11183                 int cpu, map_key;
11184
11185                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
11186                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
11187
11188                 /* in case user didn't explicitly requested particular CPUs to
11189                  * be attached to, skip offline/not present CPUs
11190                  */
11191                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
11192                         continue;
11193
11194                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
11195                 if (IS_ERR(cpu_buf)) {
11196                         err = PTR_ERR(cpu_buf);
11197                         goto error;
11198                 }
11199
11200                 pb->cpu_bufs[j] = cpu_buf;
11201
11202                 err = bpf_map_update_elem(pb->map_fd, &map_key,
11203                                           &cpu_buf->fd, 0);
11204                 if (err) {
11205                         err = -errno;
11206                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
11207                                 cpu, map_key, cpu_buf->fd,
11208                                 libbpf_strerror_r(err, msg, sizeof(msg)));
11209                         goto error;
11210                 }
11211
11212                 pb->events[j].events = EPOLLIN;
11213                 pb->events[j].data.ptr = cpu_buf;
11214                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
11215                               &pb->events[j]) < 0) {
11216                         err = -errno;
11217                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
11218                                 cpu, cpu_buf->fd,
11219                                 libbpf_strerror_r(err, msg, sizeof(msg)));
11220                         goto error;
11221                 }
11222                 j++;
11223         }
11224         pb->cpu_cnt = j;
11225         free(online);
11226
11227         return pb;
11228
11229 error:
11230         free(online);
11231         if (pb)
11232                 perf_buffer__free(pb);
11233         return ERR_PTR(err);
11234 }
11235
11236 struct perf_sample_raw {
11237         struct perf_event_header header;
11238         uint32_t size;
11239         char data[];
11240 };
11241
11242 struct perf_sample_lost {
11243         struct perf_event_header header;
11244         uint64_t id;
11245         uint64_t lost;
11246         uint64_t sample_id;
11247 };
11248
11249 static enum bpf_perf_event_ret
11250 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
11251 {
11252         struct perf_cpu_buf *cpu_buf = ctx;
11253         struct perf_buffer *pb = cpu_buf->pb;
11254         void *data = e;
11255
11256         /* user wants full control over parsing perf event */
11257         if (pb->event_cb)
11258                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
11259
11260         switch (e->type) {
11261         case PERF_RECORD_SAMPLE: {
11262                 struct perf_sample_raw *s = data;
11263
11264                 if (pb->sample_cb)
11265                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
11266                 break;
11267         }
11268         case PERF_RECORD_LOST: {
11269                 struct perf_sample_lost *s = data;
11270
11271                 if (pb->lost_cb)
11272                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
11273                 break;
11274         }
11275         default:
11276                 pr_warn("unknown perf sample type %d\n", e->type);
11277                 return LIBBPF_PERF_EVENT_ERROR;
11278         }
11279         return LIBBPF_PERF_EVENT_CONT;
11280 }
11281
11282 static int perf_buffer__process_records(struct perf_buffer *pb,
11283                                         struct perf_cpu_buf *cpu_buf)
11284 {
11285         enum bpf_perf_event_ret ret;
11286
11287         ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
11288                                          pb->page_size, &cpu_buf->buf,
11289                                          &cpu_buf->buf_size,
11290                                          perf_buffer__process_record, cpu_buf);
11291         if (ret != LIBBPF_PERF_EVENT_CONT)
11292                 return ret;
11293         return 0;
11294 }
11295
11296 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
11297 {
11298         return pb->epoll_fd;
11299 }
11300
11301 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
11302 {
11303         int i, cnt, err;
11304
11305         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
11306         if (cnt < 0)
11307                 return -errno;
11308
11309         for (i = 0; i < cnt; i++) {
11310                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
11311
11312                 err = perf_buffer__process_records(pb, cpu_buf);
11313                 if (err) {
11314                         pr_warn("error while processing records: %d\n", err);
11315                         return libbpf_err(err);
11316                 }
11317         }
11318         return cnt;
11319 }
11320
11321 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
11322  * manager.
11323  */
11324 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
11325 {
11326         return pb->cpu_cnt;
11327 }
11328
11329 /*
11330  * Return perf_event FD of a ring buffer in *buf_idx* slot of
11331  * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
11332  * select()/poll()/epoll() Linux syscalls.
11333  */
11334 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
11335 {
11336         struct perf_cpu_buf *cpu_buf;
11337
11338         if (buf_idx >= pb->cpu_cnt)
11339                 return libbpf_err(-EINVAL);
11340
11341         cpu_buf = pb->cpu_bufs[buf_idx];
11342         if (!cpu_buf)
11343                 return libbpf_err(-ENOENT);
11344
11345         return cpu_buf->fd;
11346 }
11347
11348 /*
11349  * Consume data from perf ring buffer corresponding to slot *buf_idx* in
11350  * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
11351  * consume, do nothing and return success.
11352  * Returns:
11353  *   - 0 on success;
11354  *   - <0 on failure.
11355  */
11356 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
11357 {
11358         struct perf_cpu_buf *cpu_buf;
11359
11360         if (buf_idx >= pb->cpu_cnt)
11361                 return libbpf_err(-EINVAL);
11362
11363         cpu_buf = pb->cpu_bufs[buf_idx];
11364         if (!cpu_buf)
11365                 return libbpf_err(-ENOENT);
11366
11367         return perf_buffer__process_records(pb, cpu_buf);
11368 }
11369
11370 int perf_buffer__consume(struct perf_buffer *pb)
11371 {
11372         int i, err;
11373
11374         for (i = 0; i < pb->cpu_cnt; i++) {
11375                 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
11376
11377                 if (!cpu_buf)
11378                         continue;
11379
11380                 err = perf_buffer__process_records(pb, cpu_buf);
11381                 if (err) {
11382                         pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
11383                         return libbpf_err(err);
11384                 }
11385         }
11386         return 0;
11387 }
11388
11389 struct bpf_prog_info_array_desc {
11390         int     array_offset;   /* e.g. offset of jited_prog_insns */
11391         int     count_offset;   /* e.g. offset of jited_prog_len */
11392         int     size_offset;    /* > 0: offset of rec size,
11393                                  * < 0: fix size of -size_offset
11394                                  */
11395 };
11396
11397 static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
11398         [BPF_PROG_INFO_JITED_INSNS] = {
11399                 offsetof(struct bpf_prog_info, jited_prog_insns),
11400                 offsetof(struct bpf_prog_info, jited_prog_len),
11401                 -1,
11402         },
11403         [BPF_PROG_INFO_XLATED_INSNS] = {
11404                 offsetof(struct bpf_prog_info, xlated_prog_insns),
11405                 offsetof(struct bpf_prog_info, xlated_prog_len),
11406                 -1,
11407         },
11408         [BPF_PROG_INFO_MAP_IDS] = {
11409                 offsetof(struct bpf_prog_info, map_ids),
11410                 offsetof(struct bpf_prog_info, nr_map_ids),
11411                 -(int)sizeof(__u32),
11412         },
11413         [BPF_PROG_INFO_JITED_KSYMS] = {
11414                 offsetof(struct bpf_prog_info, jited_ksyms),
11415                 offsetof(struct bpf_prog_info, nr_jited_ksyms),
11416                 -(int)sizeof(__u64),
11417         },
11418         [BPF_PROG_INFO_JITED_FUNC_LENS] = {
11419                 offsetof(struct bpf_prog_info, jited_func_lens),
11420                 offsetof(struct bpf_prog_info, nr_jited_func_lens),
11421                 -(int)sizeof(__u32),
11422         },
11423         [BPF_PROG_INFO_FUNC_INFO] = {
11424                 offsetof(struct bpf_prog_info, func_info),
11425                 offsetof(struct bpf_prog_info, nr_func_info),
11426                 offsetof(struct bpf_prog_info, func_info_rec_size),
11427         },
11428         [BPF_PROG_INFO_LINE_INFO] = {
11429                 offsetof(struct bpf_prog_info, line_info),
11430                 offsetof(struct bpf_prog_info, nr_line_info),
11431                 offsetof(struct bpf_prog_info, line_info_rec_size),
11432         },
11433         [BPF_PROG_INFO_JITED_LINE_INFO] = {
11434                 offsetof(struct bpf_prog_info, jited_line_info),
11435                 offsetof(struct bpf_prog_info, nr_jited_line_info),
11436                 offsetof(struct bpf_prog_info, jited_line_info_rec_size),
11437         },
11438         [BPF_PROG_INFO_PROG_TAGS] = {
11439                 offsetof(struct bpf_prog_info, prog_tags),
11440                 offsetof(struct bpf_prog_info, nr_prog_tags),
11441                 -(int)sizeof(__u8) * BPF_TAG_SIZE,
11442         },
11443
11444 };
11445
11446 static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
11447                                            int offset)
11448 {
11449         __u32 *array = (__u32 *)info;
11450
11451         if (offset >= 0)
11452                 return array[offset / sizeof(__u32)];
11453         return -(int)offset;
11454 }
11455
11456 static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
11457                                            int offset)
11458 {
11459         __u64 *array = (__u64 *)info;
11460
11461         if (offset >= 0)
11462                 return array[offset / sizeof(__u64)];
11463         return -(int)offset;
11464 }
11465
11466 static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
11467                                          __u32 val)
11468 {
11469         __u32 *array = (__u32 *)info;
11470
11471         if (offset >= 0)
11472                 array[offset / sizeof(__u32)] = val;
11473 }
11474
11475 static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
11476                                          __u64 val)
11477 {
11478         __u64 *array = (__u64 *)info;
11479
11480         if (offset >= 0)
11481                 array[offset / sizeof(__u64)] = val;
11482 }
11483
11484 struct bpf_prog_info_linear *
11485 bpf_program__get_prog_info_linear(int fd, __u64 arrays)
11486 {
11487         struct bpf_prog_info_linear *info_linear;
11488         struct bpf_prog_info info = {};
11489         __u32 info_len = sizeof(info);
11490         __u32 data_len = 0;
11491         int i, err;
11492         void *ptr;
11493
11494         if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
11495                 return libbpf_err_ptr(-EINVAL);
11496
11497         /* step 1: get array dimensions */
11498         err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
11499         if (err) {
11500                 pr_debug("can't get prog info: %s", strerror(errno));
11501                 return libbpf_err_ptr(-EFAULT);
11502         }
11503
11504         /* step 2: calculate total size of all arrays */
11505         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11506                 bool include_array = (arrays & (1UL << i)) > 0;
11507                 struct bpf_prog_info_array_desc *desc;
11508                 __u32 count, size;
11509
11510                 desc = bpf_prog_info_array_desc + i;
11511
11512                 /* kernel is too old to support this field */
11513                 if (info_len < desc->array_offset + sizeof(__u32) ||
11514                     info_len < desc->count_offset + sizeof(__u32) ||
11515                     (desc->size_offset > 0 && info_len < desc->size_offset))
11516                         include_array = false;
11517
11518                 if (!include_array) {
11519                         arrays &= ~(1UL << i);  /* clear the bit */
11520                         continue;
11521                 }
11522
11523                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
11524                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
11525
11526                 data_len += count * size;
11527         }
11528
11529         /* step 3: allocate continuous memory */
11530         data_len = roundup(data_len, sizeof(__u64));
11531         info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
11532         if (!info_linear)
11533                 return libbpf_err_ptr(-ENOMEM);
11534
11535         /* step 4: fill data to info_linear->info */
11536         info_linear->arrays = arrays;
11537         memset(&info_linear->info, 0, sizeof(info));
11538         ptr = info_linear->data;
11539
11540         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11541                 struct bpf_prog_info_array_desc *desc;
11542                 __u32 count, size;
11543
11544                 if ((arrays & (1UL << i)) == 0)
11545                         continue;
11546
11547                 desc  = bpf_prog_info_array_desc + i;
11548                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
11549                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
11550                 bpf_prog_info_set_offset_u32(&info_linear->info,
11551                                              desc->count_offset, count);
11552                 bpf_prog_info_set_offset_u32(&info_linear->info,
11553                                              desc->size_offset, size);
11554                 bpf_prog_info_set_offset_u64(&info_linear->info,
11555                                              desc->array_offset,
11556                                              ptr_to_u64(ptr));
11557                 ptr += count * size;
11558         }
11559
11560         /* step 5: call syscall again to get required arrays */
11561         err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
11562         if (err) {
11563                 pr_debug("can't get prog info: %s", strerror(errno));
11564                 free(info_linear);
11565                 return libbpf_err_ptr(-EFAULT);
11566         }
11567
11568         /* step 6: verify the data */
11569         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11570                 struct bpf_prog_info_array_desc *desc;
11571                 __u32 v1, v2;
11572
11573                 if ((arrays & (1UL << i)) == 0)
11574                         continue;
11575
11576                 desc = bpf_prog_info_array_desc + i;
11577                 v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
11578                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
11579                                                    desc->count_offset);
11580                 if (v1 != v2)
11581                         pr_warn("%s: mismatch in element count\n", __func__);
11582
11583                 v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
11584                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
11585                                                    desc->size_offset);
11586                 if (v1 != v2)
11587                         pr_warn("%s: mismatch in rec size\n", __func__);
11588         }
11589
11590         /* step 7: update info_len and data_len */
11591         info_linear->info_len = sizeof(struct bpf_prog_info);
11592         info_linear->data_len = data_len;
11593
11594         return info_linear;
11595 }
11596
11597 void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
11598 {
11599         int i;
11600
11601         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11602                 struct bpf_prog_info_array_desc *desc;
11603                 __u64 addr, offs;
11604
11605                 if ((info_linear->arrays & (1UL << i)) == 0)
11606                         continue;
11607
11608                 desc = bpf_prog_info_array_desc + i;
11609                 addr = bpf_prog_info_read_offset_u64(&info_linear->info,
11610                                                      desc->array_offset);
11611                 offs = addr - ptr_to_u64(info_linear->data);
11612                 bpf_prog_info_set_offset_u64(&info_linear->info,
11613                                              desc->array_offset, offs);
11614         }
11615 }
11616
11617 void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
11618 {
11619         int i;
11620
11621         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11622                 struct bpf_prog_info_array_desc *desc;
11623                 __u64 addr, offs;
11624
11625                 if ((info_linear->arrays & (1UL << i)) == 0)
11626                         continue;
11627
11628                 desc = bpf_prog_info_array_desc + i;
11629                 offs = bpf_prog_info_read_offset_u64(&info_linear->info,
11630                                                      desc->array_offset);
11631                 addr = offs + ptr_to_u64(info_linear->data);
11632                 bpf_prog_info_set_offset_u64(&info_linear->info,
11633                                              desc->array_offset, addr);
11634         }
11635 }
11636
11637 int bpf_program__set_attach_target(struct bpf_program *prog,
11638                                    int attach_prog_fd,
11639                                    const char *attach_func_name)
11640 {
11641         int btf_obj_fd = 0, btf_id = 0, err;
11642
11643         if (!prog || attach_prog_fd < 0 || !attach_func_name)
11644                 return libbpf_err(-EINVAL);
11645
11646         if (prog->obj->loaded)
11647                 return libbpf_err(-EINVAL);
11648
11649         if (attach_prog_fd) {
11650                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
11651                                                  attach_prog_fd);
11652                 if (btf_id < 0)
11653                         return libbpf_err(btf_id);
11654         } else {
11655                 /* load btf_vmlinux, if not yet */
11656                 err = bpf_object__load_vmlinux_btf(prog->obj, true);
11657                 if (err)
11658                         return libbpf_err(err);
11659                 err = find_kernel_btf_id(prog->obj, attach_func_name,
11660                                          prog->expected_attach_type,
11661                                          &btf_obj_fd, &btf_id);
11662                 if (err)
11663                         return libbpf_err(err);
11664         }
11665
11666         prog->attach_btf_id = btf_id;
11667         prog->attach_btf_obj_fd = btf_obj_fd;
11668         prog->attach_prog_fd = attach_prog_fd;
11669         return 0;
11670 }
11671
11672 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
11673 {
11674         int err = 0, n, len, start, end = -1;
11675         bool *tmp;
11676
11677         *mask = NULL;
11678         *mask_sz = 0;
11679
11680         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
11681         while (*s) {
11682                 if (*s == ',' || *s == '\n') {
11683                         s++;
11684                         continue;
11685                 }
11686                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
11687                 if (n <= 0 || n > 2) {
11688                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
11689                         err = -EINVAL;
11690                         goto cleanup;
11691                 } else if (n == 1) {
11692                         end = start;
11693                 }
11694                 if (start < 0 || start > end) {
11695                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
11696                                 start, end, s);
11697                         err = -EINVAL;
11698                         goto cleanup;
11699                 }
11700                 tmp = realloc(*mask, end + 1);
11701                 if (!tmp) {
11702                         err = -ENOMEM;
11703                         goto cleanup;
11704                 }
11705                 *mask = tmp;
11706                 memset(tmp + *mask_sz, 0, start - *mask_sz);
11707                 memset(tmp + start, 1, end - start + 1);
11708                 *mask_sz = end + 1;
11709                 s += len;
11710         }
11711         if (!*mask_sz) {
11712                 pr_warn("Empty CPU range\n");
11713                 return -EINVAL;
11714         }
11715         return 0;
11716 cleanup:
11717         free(*mask);
11718         *mask = NULL;
11719         return err;
11720 }
11721
11722 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
11723 {
11724         int fd, err = 0, len;
11725         char buf[128];
11726
11727         fd = open(fcpu, O_RDONLY);
11728         if (fd < 0) {
11729                 err = -errno;
11730                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
11731                 return err;
11732         }
11733         len = read(fd, buf, sizeof(buf));
11734         close(fd);
11735         if (len <= 0) {
11736                 err = len ? -errno : -EINVAL;
11737                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
11738                 return err;
11739         }
11740         if (len >= sizeof(buf)) {
11741                 pr_warn("CPU mask is too big in file %s\n", fcpu);
11742                 return -E2BIG;
11743         }
11744         buf[len] = '\0';
11745
11746         return parse_cpu_mask_str(buf, mask, mask_sz);
11747 }
11748
11749 int libbpf_num_possible_cpus(void)
11750 {
11751         static const char *fcpu = "/sys/devices/system/cpu/possible";
11752         static int cpus;
11753         int err, n, i, tmp_cpus;
11754         bool *mask;
11755
11756         tmp_cpus = READ_ONCE(cpus);
11757         if (tmp_cpus > 0)
11758                 return tmp_cpus;
11759
11760         err = parse_cpu_mask_file(fcpu, &mask, &n);
11761         if (err)
11762                 return libbpf_err(err);
11763
11764         tmp_cpus = 0;
11765         for (i = 0; i < n; i++) {
11766                 if (mask[i])
11767                         tmp_cpus++;
11768         }
11769         free(mask);
11770
11771         WRITE_ONCE(cpus, tmp_cpus);
11772         return tmp_cpus;
11773 }
11774
11775 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
11776                               const struct bpf_object_open_opts *opts)
11777 {
11778         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
11779                 .object_name = s->name,
11780         );
11781         struct bpf_object *obj;
11782         int i, err;
11783
11784         /* Attempt to preserve opts->object_name, unless overriden by user
11785          * explicitly. Overwriting object name for skeletons is discouraged,
11786          * as it breaks global data maps, because they contain object name
11787          * prefix as their own map name prefix. When skeleton is generated,
11788          * bpftool is making an assumption that this name will stay the same.
11789          */
11790         if (opts) {
11791                 memcpy(&skel_opts, opts, sizeof(*opts));
11792                 if (!opts->object_name)
11793                         skel_opts.object_name = s->name;
11794         }
11795
11796         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
11797         err = libbpf_get_error(obj);
11798         if (err) {
11799                 pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
11800                         s->name, err);
11801                 return libbpf_err(err);
11802         }
11803
11804         *s->obj = obj;
11805
11806         for (i = 0; i < s->map_cnt; i++) {
11807                 struct bpf_map **map = s->maps[i].map;
11808                 const char *name = s->maps[i].name;
11809                 void **mmaped = s->maps[i].mmaped;
11810
11811                 *map = bpf_object__find_map_by_name(obj, name);
11812                 if (!*map) {
11813                         pr_warn("failed to find skeleton map '%s'\n", name);
11814                         return libbpf_err(-ESRCH);
11815                 }
11816
11817                 /* externs shouldn't be pre-setup from user code */
11818                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
11819                         *mmaped = (*map)->mmaped;
11820         }
11821
11822         for (i = 0; i < s->prog_cnt; i++) {
11823                 struct bpf_program **prog = s->progs[i].prog;
11824                 const char *name = s->progs[i].name;
11825
11826                 *prog = bpf_object__find_program_by_name(obj, name);
11827                 if (!*prog) {
11828                         pr_warn("failed to find skeleton program '%s'\n", name);
11829                         return libbpf_err(-ESRCH);
11830                 }
11831         }
11832
11833         return 0;
11834 }
11835
11836 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
11837 {
11838         int i, err;
11839
11840         err = bpf_object__load(*s->obj);
11841         if (err) {
11842                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
11843                 return libbpf_err(err);
11844         }
11845
11846         for (i = 0; i < s->map_cnt; i++) {
11847                 struct bpf_map *map = *s->maps[i].map;
11848                 size_t mmap_sz = bpf_map_mmap_sz(map);
11849                 int prot, map_fd = bpf_map__fd(map);
11850                 void **mmaped = s->maps[i].mmaped;
11851
11852                 if (!mmaped)
11853                         continue;
11854
11855                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
11856                         *mmaped = NULL;
11857                         continue;
11858                 }
11859
11860                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
11861                         prot = PROT_READ;
11862                 else
11863                         prot = PROT_READ | PROT_WRITE;
11864
11865                 /* Remap anonymous mmap()-ed "map initialization image" as
11866                  * a BPF map-backed mmap()-ed memory, but preserving the same
11867                  * memory address. This will cause kernel to change process'
11868                  * page table to point to a different piece of kernel memory,
11869                  * but from userspace point of view memory address (and its
11870                  * contents, being identical at this point) will stay the
11871                  * same. This mapping will be released by bpf_object__close()
11872                  * as per normal clean up procedure, so we don't need to worry
11873                  * about it from skeleton's clean up perspective.
11874                  */
11875                 *mmaped = mmap(map->mmaped, mmap_sz, prot,
11876                                 MAP_SHARED | MAP_FIXED, map_fd, 0);
11877                 if (*mmaped == MAP_FAILED) {
11878                         err = -errno;
11879                         *mmaped = NULL;
11880                         pr_warn("failed to re-mmap() map '%s': %d\n",
11881                                  bpf_map__name(map), err);
11882                         return libbpf_err(err);
11883                 }
11884         }
11885
11886         return 0;
11887 }
11888
11889 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
11890 {
11891         int i, err;
11892
11893         for (i = 0; i < s->prog_cnt; i++) {
11894                 struct bpf_program *prog = *s->progs[i].prog;
11895                 struct bpf_link **link = s->progs[i].link;
11896                 const struct bpf_sec_def *sec_def;
11897
11898                 if (!prog->load)
11899                         continue;
11900
11901                 sec_def = find_sec_def(prog->sec_name);
11902                 if (!sec_def || !sec_def->attach_fn)
11903                         continue;
11904
11905                 *link = sec_def->attach_fn(sec_def, prog);
11906                 err = libbpf_get_error(*link);
11907                 if (err) {
11908                         pr_warn("failed to auto-attach program '%s': %d\n",
11909                                 bpf_program__name(prog), err);
11910                         return libbpf_err(err);
11911                 }
11912         }
11913
11914         return 0;
11915 }
11916
11917 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
11918 {
11919         int i;
11920
11921         for (i = 0; i < s->prog_cnt; i++) {
11922                 struct bpf_link **link = s->progs[i].link;
11923
11924                 bpf_link__destroy(*link);
11925                 *link = NULL;
11926         }
11927 }
11928
11929 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
11930 {
11931         if (s->progs)
11932                 bpf_object__detach_skeleton(s);
11933         if (s->obj)
11934                 bpf_object__close(*s->obj);
11935         free(s->maps);
11936         free(s->progs);
11937         free(s);
11938 }