Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
[linux-2.6-microblaze.git] / tools / lib / bpf / libbpf.c
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4  * Common eBPF ELF object loading operations.
5  *
6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8  * Copyright (C) 2015 Huawei Inc.
9  * Copyright (C) 2017 Nicira, Inc.
10  * Copyright (C) 2019 Isovalent, Inc.
11  */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <limits.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <endian.h>
25 #include <fcntl.h>
26 #include <errno.h>
27 #include <ctype.h>
28 #include <asm/unistd.h>
29 #include <linux/err.h>
30 #include <linux/kernel.h>
31 #include <linux/bpf.h>
32 #include <linux/btf.h>
33 #include <linux/filter.h>
34 #include <linux/list.h>
35 #include <linux/limits.h>
36 #include <linux/perf_event.h>
37 #include <linux/ring_buffer.h>
38 #include <linux/version.h>
39 #include <sys/epoll.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44 #include <sys/vfs.h>
45 #include <sys/utsname.h>
46 #include <sys/resource.h>
47 #include <libelf.h>
48 #include <gelf.h>
49 #include <zlib.h>
50
51 #include "libbpf.h"
52 #include "bpf.h"
53 #include "btf.h"
54 #include "str_error.h"
55 #include "libbpf_internal.h"
56 #include "hashmap.h"
57
58 #ifndef BPF_FS_MAGIC
59 #define BPF_FS_MAGIC            0xcafe4a11
60 #endif
61
62 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
63
64 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
65  * compilation if user enables corresponding warning. Disable it explicitly.
66  */
67 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
68
69 #define __printf(a, b)  __attribute__((format(printf, a, b)))
70
71 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
72 static const struct btf_type *
73 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
74
75 static int __base_pr(enum libbpf_print_level level, const char *format,
76                      va_list args)
77 {
78         if (level == LIBBPF_DEBUG)
79                 return 0;
80
81         return vfprintf(stderr, format, args);
82 }
83
84 static libbpf_print_fn_t __libbpf_pr = __base_pr;
85
86 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
87 {
88         libbpf_print_fn_t old_print_fn = __libbpf_pr;
89
90         __libbpf_pr = fn;
91         return old_print_fn;
92 }
93
94 __printf(2, 3)
95 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
96 {
97         va_list args;
98
99         if (!__libbpf_pr)
100                 return;
101
102         va_start(args, format);
103         __libbpf_pr(level, format, args);
104         va_end(args);
105 }
106
107 static void pr_perm_msg(int err)
108 {
109         struct rlimit limit;
110         char buf[100];
111
112         if (err != -EPERM || geteuid() != 0)
113                 return;
114
115         err = getrlimit(RLIMIT_MEMLOCK, &limit);
116         if (err)
117                 return;
118
119         if (limit.rlim_cur == RLIM_INFINITY)
120                 return;
121
122         if (limit.rlim_cur < 1024)
123                 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
124         else if (limit.rlim_cur < 1024*1024)
125                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
126         else
127                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
128
129         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
130                 buf);
131 }
132
133 #define STRERR_BUFSIZE  128
134
135 /* Copied from tools/perf/util/util.h */
136 #ifndef zfree
137 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
138 #endif
139
140 #ifndef zclose
141 # define zclose(fd) ({                  \
142         int ___err = 0;                 \
143         if ((fd) >= 0)                  \
144                 ___err = close((fd));   \
145         fd = -1;                        \
146         ___err; })
147 #endif
148
149 static inline __u64 ptr_to_u64(const void *ptr)
150 {
151         return (__u64) (unsigned long) ptr;
152 }
153
154 enum kern_feature_id {
155         /* v4.14: kernel support for program & map names. */
156         FEAT_PROG_NAME,
157         /* v5.2: kernel support for global data sections. */
158         FEAT_GLOBAL_DATA,
159         /* BTF support */
160         FEAT_BTF,
161         /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
162         FEAT_BTF_FUNC,
163         /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
164         FEAT_BTF_DATASEC,
165         /* BTF_FUNC_GLOBAL is supported */
166         FEAT_BTF_GLOBAL_FUNC,
167         /* BPF_F_MMAPABLE is supported for arrays */
168         FEAT_ARRAY_MMAP,
169         /* kernel support for expected_attach_type in BPF_PROG_LOAD */
170         FEAT_EXP_ATTACH_TYPE,
171         /* bpf_probe_read_{kernel,user}[_str] helpers */
172         FEAT_PROBE_READ_KERN,
173         /* BPF_PROG_BIND_MAP is supported */
174         FEAT_PROG_BIND_MAP,
175         /* Kernel support for module BTFs */
176         FEAT_MODULE_BTF,
177         /* BTF_KIND_FLOAT support */
178         FEAT_BTF_FLOAT,
179         __FEAT_CNT,
180 };
181
182 static bool kernel_supports(enum kern_feature_id feat_id);
183
184 enum reloc_type {
185         RELO_LD64,
186         RELO_CALL,
187         RELO_DATA,
188         RELO_EXTERN,
189         RELO_SUBPROG_ADDR,
190 };
191
192 struct reloc_desc {
193         enum reloc_type type;
194         int insn_idx;
195         int map_idx;
196         int sym_off;
197         bool processed;
198 };
199
200 struct bpf_sec_def;
201
202 typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
203                                         struct bpf_program *prog);
204
205 struct bpf_sec_def {
206         const char *sec;
207         size_t len;
208         enum bpf_prog_type prog_type;
209         enum bpf_attach_type expected_attach_type;
210         bool is_exp_attach_type_optional;
211         bool is_attachable;
212         bool is_attach_btf;
213         bool is_sleepable;
214         attach_fn_t attach_fn;
215 };
216
217 /*
218  * bpf_prog should be a better name but it has been used in
219  * linux/filter.h.
220  */
221 struct bpf_program {
222         const struct bpf_sec_def *sec_def;
223         char *sec_name;
224         size_t sec_idx;
225         /* this program's instruction offset (in number of instructions)
226          * within its containing ELF section
227          */
228         size_t sec_insn_off;
229         /* number of original instructions in ELF section belonging to this
230          * program, not taking into account subprogram instructions possible
231          * appended later during relocation
232          */
233         size_t sec_insn_cnt;
234         /* Offset (in number of instructions) of the start of instruction
235          * belonging to this BPF program  within its containing main BPF
236          * program. For the entry-point (main) BPF program, this is always
237          * zero. For a sub-program, this gets reset before each of main BPF
238          * programs are processed and relocated and is used to determined
239          * whether sub-program was already appended to the main program, and
240          * if yes, at which instruction offset.
241          */
242         size_t sub_insn_off;
243
244         char *name;
245         /* sec_name with / replaced by _; makes recursive pinning
246          * in bpf_object__pin_programs easier
247          */
248         char *pin_name;
249
250         /* instructions that belong to BPF program; insns[0] is located at
251          * sec_insn_off instruction within its ELF section in ELF file, so
252          * when mapping ELF file instruction index to the local instruction,
253          * one needs to subtract sec_insn_off; and vice versa.
254          */
255         struct bpf_insn *insns;
256         /* actual number of instruction in this BPF program's image; for
257          * entry-point BPF programs this includes the size of main program
258          * itself plus all the used sub-programs, appended at the end
259          */
260         size_t insns_cnt;
261
262         struct reloc_desc *reloc_desc;
263         int nr_reloc;
264         int log_level;
265
266         struct {
267                 int nr;
268                 int *fds;
269         } instances;
270         bpf_program_prep_t preprocessor;
271
272         struct bpf_object *obj;
273         void *priv;
274         bpf_program_clear_priv_t clear_priv;
275
276         bool load;
277         enum bpf_prog_type type;
278         enum bpf_attach_type expected_attach_type;
279         int prog_ifindex;
280         __u32 attach_btf_obj_fd;
281         __u32 attach_btf_id;
282         __u32 attach_prog_fd;
283         void *func_info;
284         __u32 func_info_rec_size;
285         __u32 func_info_cnt;
286
287         void *line_info;
288         __u32 line_info_rec_size;
289         __u32 line_info_cnt;
290         __u32 prog_flags;
291 };
292
293 struct bpf_struct_ops {
294         const char *tname;
295         const struct btf_type *type;
296         struct bpf_program **progs;
297         __u32 *kern_func_off;
298         /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
299         void *data;
300         /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
301          *      btf_vmlinux's format.
302          * struct bpf_struct_ops_tcp_congestion_ops {
303          *      [... some other kernel fields ...]
304          *      struct tcp_congestion_ops data;
305          * }
306          * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
307          * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
308          * from "data".
309          */
310         void *kern_vdata;
311         __u32 type_id;
312 };
313
314 #define DATA_SEC ".data"
315 #define BSS_SEC ".bss"
316 #define RODATA_SEC ".rodata"
317 #define KCONFIG_SEC ".kconfig"
318 #define KSYMS_SEC ".ksyms"
319 #define STRUCT_OPS_SEC ".struct_ops"
320
321 enum libbpf_map_type {
322         LIBBPF_MAP_UNSPEC,
323         LIBBPF_MAP_DATA,
324         LIBBPF_MAP_BSS,
325         LIBBPF_MAP_RODATA,
326         LIBBPF_MAP_KCONFIG,
327 };
328
329 static const char * const libbpf_type_to_btf_name[] = {
330         [LIBBPF_MAP_DATA]       = DATA_SEC,
331         [LIBBPF_MAP_BSS]        = BSS_SEC,
332         [LIBBPF_MAP_RODATA]     = RODATA_SEC,
333         [LIBBPF_MAP_KCONFIG]    = KCONFIG_SEC,
334 };
335
336 struct bpf_map {
337         char *name;
338         int fd;
339         int sec_idx;
340         size_t sec_offset;
341         int map_ifindex;
342         int inner_map_fd;
343         struct bpf_map_def def;
344         __u32 numa_node;
345         __u32 btf_var_idx;
346         __u32 btf_key_type_id;
347         __u32 btf_value_type_id;
348         __u32 btf_vmlinux_value_type_id;
349         void *priv;
350         bpf_map_clear_priv_t clear_priv;
351         enum libbpf_map_type libbpf_type;
352         void *mmaped;
353         struct bpf_struct_ops *st_ops;
354         struct bpf_map *inner_map;
355         void **init_slots;
356         int init_slots_sz;
357         char *pin_path;
358         bool pinned;
359         bool reused;
360 };
361
362 enum extern_type {
363         EXT_UNKNOWN,
364         EXT_KCFG,
365         EXT_KSYM,
366 };
367
368 enum kcfg_type {
369         KCFG_UNKNOWN,
370         KCFG_CHAR,
371         KCFG_BOOL,
372         KCFG_INT,
373         KCFG_TRISTATE,
374         KCFG_CHAR_ARR,
375 };
376
377 struct extern_desc {
378         enum extern_type type;
379         int sym_idx;
380         int btf_id;
381         int sec_btf_id;
382         const char *name;
383         bool is_set;
384         bool is_weak;
385         union {
386                 struct {
387                         enum kcfg_type type;
388                         int sz;
389                         int align;
390                         int data_off;
391                         bool is_signed;
392                 } kcfg;
393                 struct {
394                         unsigned long long addr;
395
396                         /* target btf_id of the corresponding kernel var. */
397                         int kernel_btf_obj_fd;
398                         int kernel_btf_id;
399
400                         /* local btf_id of the ksym extern's type. */
401                         __u32 type_id;
402                 } ksym;
403         };
404 };
405
406 static LIST_HEAD(bpf_objects_list);
407
408 struct module_btf {
409         struct btf *btf;
410         char *name;
411         __u32 id;
412         int fd;
413 };
414
415 struct bpf_object {
416         char name[BPF_OBJ_NAME_LEN];
417         char license[64];
418         __u32 kern_version;
419
420         struct bpf_program *programs;
421         size_t nr_programs;
422         struct bpf_map *maps;
423         size_t nr_maps;
424         size_t maps_cap;
425
426         char *kconfig;
427         struct extern_desc *externs;
428         int nr_extern;
429         int kconfig_map_idx;
430         int rodata_map_idx;
431
432         bool loaded;
433         bool has_subcalls;
434
435         /*
436          * Information when doing elf related work. Only valid if fd
437          * is valid.
438          */
439         struct {
440                 int fd;
441                 const void *obj_buf;
442                 size_t obj_buf_sz;
443                 Elf *elf;
444                 GElf_Ehdr ehdr;
445                 Elf_Data *symbols;
446                 Elf_Data *data;
447                 Elf_Data *rodata;
448                 Elf_Data *bss;
449                 Elf_Data *st_ops_data;
450                 size_t shstrndx; /* section index for section name strings */
451                 size_t strtabidx;
452                 struct {
453                         GElf_Shdr shdr;
454                         Elf_Data *data;
455                 } *reloc_sects;
456                 int nr_reloc_sects;
457                 int maps_shndx;
458                 int btf_maps_shndx;
459                 __u32 btf_maps_sec_btf_id;
460                 int text_shndx;
461                 int symbols_shndx;
462                 int data_shndx;
463                 int rodata_shndx;
464                 int bss_shndx;
465                 int st_ops_shndx;
466         } efile;
467         /*
468          * All loaded bpf_object is linked in a list, which is
469          * hidden to caller. bpf_objects__<func> handlers deal with
470          * all objects.
471          */
472         struct list_head list;
473
474         struct btf *btf;
475         struct btf_ext *btf_ext;
476
477         /* Parse and load BTF vmlinux if any of the programs in the object need
478          * it at load time.
479          */
480         struct btf *btf_vmlinux;
481         /* vmlinux BTF override for CO-RE relocations */
482         struct btf *btf_vmlinux_override;
483         /* Lazily initialized kernel module BTFs */
484         struct module_btf *btf_modules;
485         bool btf_modules_loaded;
486         size_t btf_module_cnt;
487         size_t btf_module_cap;
488
489         void *priv;
490         bpf_object_clear_priv_t clear_priv;
491
492         char path[];
493 };
494 #define obj_elf_valid(o)        ((o)->efile.elf)
495
496 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
497 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
498 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
499 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
500 static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
501 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
502 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
503 static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
504                               size_t off, __u32 sym_type, GElf_Sym *sym);
505
506 void bpf_program__unload(struct bpf_program *prog)
507 {
508         int i;
509
510         if (!prog)
511                 return;
512
513         /*
514          * If the object is opened but the program was never loaded,
515          * it is possible that prog->instances.nr == -1.
516          */
517         if (prog->instances.nr > 0) {
518                 for (i = 0; i < prog->instances.nr; i++)
519                         zclose(prog->instances.fds[i]);
520         } else if (prog->instances.nr != -1) {
521                 pr_warn("Internal error: instances.nr is %d\n",
522                         prog->instances.nr);
523         }
524
525         prog->instances.nr = -1;
526         zfree(&prog->instances.fds);
527
528         zfree(&prog->func_info);
529         zfree(&prog->line_info);
530 }
531
532 static void bpf_program__exit(struct bpf_program *prog)
533 {
534         if (!prog)
535                 return;
536
537         if (prog->clear_priv)
538                 prog->clear_priv(prog, prog->priv);
539
540         prog->priv = NULL;
541         prog->clear_priv = NULL;
542
543         bpf_program__unload(prog);
544         zfree(&prog->name);
545         zfree(&prog->sec_name);
546         zfree(&prog->pin_name);
547         zfree(&prog->insns);
548         zfree(&prog->reloc_desc);
549
550         prog->nr_reloc = 0;
551         prog->insns_cnt = 0;
552         prog->sec_idx = -1;
553 }
554
555 static char *__bpf_program__pin_name(struct bpf_program *prog)
556 {
557         char *name, *p;
558
559         name = p = strdup(prog->sec_name);
560         while ((p = strchr(p, '/')))
561                 *p = '_';
562
563         return name;
564 }
565
566 static bool insn_is_subprog_call(const struct bpf_insn *insn)
567 {
568         return BPF_CLASS(insn->code) == BPF_JMP &&
569                BPF_OP(insn->code) == BPF_CALL &&
570                BPF_SRC(insn->code) == BPF_K &&
571                insn->src_reg == BPF_PSEUDO_CALL &&
572                insn->dst_reg == 0 &&
573                insn->off == 0;
574 }
575
576 static bool is_ldimm64(struct bpf_insn *insn)
577 {
578         return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
579 }
580
581 static bool insn_is_pseudo_func(struct bpf_insn *insn)
582 {
583         return is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
584 }
585
586 static int
587 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
588                       const char *name, size_t sec_idx, const char *sec_name,
589                       size_t sec_off, void *insn_data, size_t insn_data_sz)
590 {
591         if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
592                 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
593                         sec_name, name, sec_off, insn_data_sz);
594                 return -EINVAL;
595         }
596
597         memset(prog, 0, sizeof(*prog));
598         prog->obj = obj;
599
600         prog->sec_idx = sec_idx;
601         prog->sec_insn_off = sec_off / BPF_INSN_SZ;
602         prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
603         /* insns_cnt can later be increased by appending used subprograms */
604         prog->insns_cnt = prog->sec_insn_cnt;
605
606         prog->type = BPF_PROG_TYPE_UNSPEC;
607         prog->load = true;
608
609         prog->instances.fds = NULL;
610         prog->instances.nr = -1;
611
612         prog->sec_name = strdup(sec_name);
613         if (!prog->sec_name)
614                 goto errout;
615
616         prog->name = strdup(name);
617         if (!prog->name)
618                 goto errout;
619
620         prog->pin_name = __bpf_program__pin_name(prog);
621         if (!prog->pin_name)
622                 goto errout;
623
624         prog->insns = malloc(insn_data_sz);
625         if (!prog->insns)
626                 goto errout;
627         memcpy(prog->insns, insn_data, insn_data_sz);
628
629         return 0;
630 errout:
631         pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
632         bpf_program__exit(prog);
633         return -ENOMEM;
634 }
635
636 static int
637 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
638                          const char *sec_name, int sec_idx)
639 {
640         struct bpf_program *prog, *progs;
641         void *data = sec_data->d_buf;
642         size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
643         int nr_progs, err;
644         const char *name;
645         GElf_Sym sym;
646
647         progs = obj->programs;
648         nr_progs = obj->nr_programs;
649         sec_off = 0;
650
651         while (sec_off < sec_sz) {
652                 if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
653                         pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
654                                 sec_name, sec_off);
655                         return -LIBBPF_ERRNO__FORMAT;
656                 }
657
658                 prog_sz = sym.st_size;
659
660                 name = elf_sym_str(obj, sym.st_name);
661                 if (!name) {
662                         pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
663                                 sec_name, sec_off);
664                         return -LIBBPF_ERRNO__FORMAT;
665                 }
666
667                 if (sec_off + prog_sz > sec_sz) {
668                         pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
669                                 sec_name, sec_off);
670                         return -LIBBPF_ERRNO__FORMAT;
671                 }
672
673                 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
674                          sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
675
676                 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
677                 if (!progs) {
678                         /*
679                          * In this case the original obj->programs
680                          * is still valid, so don't need special treat for
681                          * bpf_close_object().
682                          */
683                         pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
684                                 sec_name, name);
685                         return -ENOMEM;
686                 }
687                 obj->programs = progs;
688
689                 prog = &progs[nr_progs];
690
691                 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
692                                             sec_off, data + sec_off, prog_sz);
693                 if (err)
694                         return err;
695
696                 nr_progs++;
697                 obj->nr_programs = nr_progs;
698
699                 sec_off += prog_sz;
700         }
701
702         return 0;
703 }
704
705 static __u32 get_kernel_version(void)
706 {
707         __u32 major, minor, patch;
708         struct utsname info;
709
710         uname(&info);
711         if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
712                 return 0;
713         return KERNEL_VERSION(major, minor, patch);
714 }
715
716 static const struct btf_member *
717 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
718 {
719         struct btf_member *m;
720         int i;
721
722         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
723                 if (btf_member_bit_offset(t, i) == bit_offset)
724                         return m;
725         }
726
727         return NULL;
728 }
729
730 static const struct btf_member *
731 find_member_by_name(const struct btf *btf, const struct btf_type *t,
732                     const char *name)
733 {
734         struct btf_member *m;
735         int i;
736
737         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
738                 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
739                         return m;
740         }
741
742         return NULL;
743 }
744
745 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
746 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
747                                    const char *name, __u32 kind);
748
749 static int
750 find_struct_ops_kern_types(const struct btf *btf, const char *tname,
751                            const struct btf_type **type, __u32 *type_id,
752                            const struct btf_type **vtype, __u32 *vtype_id,
753                            const struct btf_member **data_member)
754 {
755         const struct btf_type *kern_type, *kern_vtype;
756         const struct btf_member *kern_data_member;
757         __s32 kern_vtype_id, kern_type_id;
758         __u32 i;
759
760         kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
761         if (kern_type_id < 0) {
762                 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
763                         tname);
764                 return kern_type_id;
765         }
766         kern_type = btf__type_by_id(btf, kern_type_id);
767
768         /* Find the corresponding "map_value" type that will be used
769          * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
770          * find "struct bpf_struct_ops_tcp_congestion_ops" from the
771          * btf_vmlinux.
772          */
773         kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
774                                                 tname, BTF_KIND_STRUCT);
775         if (kern_vtype_id < 0) {
776                 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
777                         STRUCT_OPS_VALUE_PREFIX, tname);
778                 return kern_vtype_id;
779         }
780         kern_vtype = btf__type_by_id(btf, kern_vtype_id);
781
782         /* Find "struct tcp_congestion_ops" from
783          * struct bpf_struct_ops_tcp_congestion_ops {
784          *      [ ... ]
785          *      struct tcp_congestion_ops data;
786          * }
787          */
788         kern_data_member = btf_members(kern_vtype);
789         for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
790                 if (kern_data_member->type == kern_type_id)
791                         break;
792         }
793         if (i == btf_vlen(kern_vtype)) {
794                 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
795                         tname, STRUCT_OPS_VALUE_PREFIX, tname);
796                 return -EINVAL;
797         }
798
799         *type = kern_type;
800         *type_id = kern_type_id;
801         *vtype = kern_vtype;
802         *vtype_id = kern_vtype_id;
803         *data_member = kern_data_member;
804
805         return 0;
806 }
807
808 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
809 {
810         return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
811 }
812
813 /* Init the map's fields that depend on kern_btf */
814 static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
815                                          const struct btf *btf,
816                                          const struct btf *kern_btf)
817 {
818         const struct btf_member *member, *kern_member, *kern_data_member;
819         const struct btf_type *type, *kern_type, *kern_vtype;
820         __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
821         struct bpf_struct_ops *st_ops;
822         void *data, *kern_data;
823         const char *tname;
824         int err;
825
826         st_ops = map->st_ops;
827         type = st_ops->type;
828         tname = st_ops->tname;
829         err = find_struct_ops_kern_types(kern_btf, tname,
830                                          &kern_type, &kern_type_id,
831                                          &kern_vtype, &kern_vtype_id,
832                                          &kern_data_member);
833         if (err)
834                 return err;
835
836         pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
837                  map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
838
839         map->def.value_size = kern_vtype->size;
840         map->btf_vmlinux_value_type_id = kern_vtype_id;
841
842         st_ops->kern_vdata = calloc(1, kern_vtype->size);
843         if (!st_ops->kern_vdata)
844                 return -ENOMEM;
845
846         data = st_ops->data;
847         kern_data_off = kern_data_member->offset / 8;
848         kern_data = st_ops->kern_vdata + kern_data_off;
849
850         member = btf_members(type);
851         for (i = 0; i < btf_vlen(type); i++, member++) {
852                 const struct btf_type *mtype, *kern_mtype;
853                 __u32 mtype_id, kern_mtype_id;
854                 void *mdata, *kern_mdata;
855                 __s64 msize, kern_msize;
856                 __u32 moff, kern_moff;
857                 __u32 kern_member_idx;
858                 const char *mname;
859
860                 mname = btf__name_by_offset(btf, member->name_off);
861                 kern_member = find_member_by_name(kern_btf, kern_type, mname);
862                 if (!kern_member) {
863                         pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
864                                 map->name, mname);
865                         return -ENOTSUP;
866                 }
867
868                 kern_member_idx = kern_member - btf_members(kern_type);
869                 if (btf_member_bitfield_size(type, i) ||
870                     btf_member_bitfield_size(kern_type, kern_member_idx)) {
871                         pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
872                                 map->name, mname);
873                         return -ENOTSUP;
874                 }
875
876                 moff = member->offset / 8;
877                 kern_moff = kern_member->offset / 8;
878
879                 mdata = data + moff;
880                 kern_mdata = kern_data + kern_moff;
881
882                 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
883                 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
884                                                     &kern_mtype_id);
885                 if (BTF_INFO_KIND(mtype->info) !=
886                     BTF_INFO_KIND(kern_mtype->info)) {
887                         pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
888                                 map->name, mname, BTF_INFO_KIND(mtype->info),
889                                 BTF_INFO_KIND(kern_mtype->info));
890                         return -ENOTSUP;
891                 }
892
893                 if (btf_is_ptr(mtype)) {
894                         struct bpf_program *prog;
895
896                         prog = st_ops->progs[i];
897                         if (!prog)
898                                 continue;
899
900                         kern_mtype = skip_mods_and_typedefs(kern_btf,
901                                                             kern_mtype->type,
902                                                             &kern_mtype_id);
903
904                         /* mtype->type must be a func_proto which was
905                          * guaranteed in bpf_object__collect_st_ops_relos(),
906                          * so only check kern_mtype for func_proto here.
907                          */
908                         if (!btf_is_func_proto(kern_mtype)) {
909                                 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
910                                         map->name, mname);
911                                 return -ENOTSUP;
912                         }
913
914                         prog->attach_btf_id = kern_type_id;
915                         prog->expected_attach_type = kern_member_idx;
916
917                         st_ops->kern_func_off[i] = kern_data_off + kern_moff;
918
919                         pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
920                                  map->name, mname, prog->name, moff,
921                                  kern_moff);
922
923                         continue;
924                 }
925
926                 msize = btf__resolve_size(btf, mtype_id);
927                 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
928                 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
929                         pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
930                                 map->name, mname, (ssize_t)msize,
931                                 (ssize_t)kern_msize);
932                         return -ENOTSUP;
933                 }
934
935                 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
936                          map->name, mname, (unsigned int)msize,
937                          moff, kern_moff);
938                 memcpy(kern_mdata, mdata, msize);
939         }
940
941         return 0;
942 }
943
944 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
945 {
946         struct bpf_map *map;
947         size_t i;
948         int err;
949
950         for (i = 0; i < obj->nr_maps; i++) {
951                 map = &obj->maps[i];
952
953                 if (!bpf_map__is_struct_ops(map))
954                         continue;
955
956                 err = bpf_map__init_kern_struct_ops(map, obj->btf,
957                                                     obj->btf_vmlinux);
958                 if (err)
959                         return err;
960         }
961
962         return 0;
963 }
964
965 static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
966 {
967         const struct btf_type *type, *datasec;
968         const struct btf_var_secinfo *vsi;
969         struct bpf_struct_ops *st_ops;
970         const char *tname, *var_name;
971         __s32 type_id, datasec_id;
972         const struct btf *btf;
973         struct bpf_map *map;
974         __u32 i;
975
976         if (obj->efile.st_ops_shndx == -1)
977                 return 0;
978
979         btf = obj->btf;
980         datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
981                                             BTF_KIND_DATASEC);
982         if (datasec_id < 0) {
983                 pr_warn("struct_ops init: DATASEC %s not found\n",
984                         STRUCT_OPS_SEC);
985                 return -EINVAL;
986         }
987
988         datasec = btf__type_by_id(btf, datasec_id);
989         vsi = btf_var_secinfos(datasec);
990         for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
991                 type = btf__type_by_id(obj->btf, vsi->type);
992                 var_name = btf__name_by_offset(obj->btf, type->name_off);
993
994                 type_id = btf__resolve_type(obj->btf, vsi->type);
995                 if (type_id < 0) {
996                         pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
997                                 vsi->type, STRUCT_OPS_SEC);
998                         return -EINVAL;
999                 }
1000
1001                 type = btf__type_by_id(obj->btf, type_id);
1002                 tname = btf__name_by_offset(obj->btf, type->name_off);
1003                 if (!tname[0]) {
1004                         pr_warn("struct_ops init: anonymous type is not supported\n");
1005                         return -ENOTSUP;
1006                 }
1007                 if (!btf_is_struct(type)) {
1008                         pr_warn("struct_ops init: %s is not a struct\n", tname);
1009                         return -EINVAL;
1010                 }
1011
1012                 map = bpf_object__add_map(obj);
1013                 if (IS_ERR(map))
1014                         return PTR_ERR(map);
1015
1016                 map->sec_idx = obj->efile.st_ops_shndx;
1017                 map->sec_offset = vsi->offset;
1018                 map->name = strdup(var_name);
1019                 if (!map->name)
1020                         return -ENOMEM;
1021
1022                 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1023                 map->def.key_size = sizeof(int);
1024                 map->def.value_size = type->size;
1025                 map->def.max_entries = 1;
1026
1027                 map->st_ops = calloc(1, sizeof(*map->st_ops));
1028                 if (!map->st_ops)
1029                         return -ENOMEM;
1030                 st_ops = map->st_ops;
1031                 st_ops->data = malloc(type->size);
1032                 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1033                 st_ops->kern_func_off = malloc(btf_vlen(type) *
1034                                                sizeof(*st_ops->kern_func_off));
1035                 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1036                         return -ENOMEM;
1037
1038                 if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
1039                         pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1040                                 var_name, STRUCT_OPS_SEC);
1041                         return -EINVAL;
1042                 }
1043
1044                 memcpy(st_ops->data,
1045                        obj->efile.st_ops_data->d_buf + vsi->offset,
1046                        type->size);
1047                 st_ops->tname = tname;
1048                 st_ops->type = type;
1049                 st_ops->type_id = type_id;
1050
1051                 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1052                          tname, type_id, var_name, vsi->offset);
1053         }
1054
1055         return 0;
1056 }
1057
1058 static struct bpf_object *bpf_object__new(const char *path,
1059                                           const void *obj_buf,
1060                                           size_t obj_buf_sz,
1061                                           const char *obj_name)
1062 {
1063         struct bpf_object *obj;
1064         char *end;
1065
1066         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1067         if (!obj) {
1068                 pr_warn("alloc memory failed for %s\n", path);
1069                 return ERR_PTR(-ENOMEM);
1070         }
1071
1072         strcpy(obj->path, path);
1073         if (obj_name) {
1074                 strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
1075                 obj->name[sizeof(obj->name) - 1] = 0;
1076         } else {
1077                 /* Using basename() GNU version which doesn't modify arg. */
1078                 strncpy(obj->name, basename((void *)path),
1079                         sizeof(obj->name) - 1);
1080                 end = strchr(obj->name, '.');
1081                 if (end)
1082                         *end = 0;
1083         }
1084
1085         obj->efile.fd = -1;
1086         /*
1087          * Caller of this function should also call
1088          * bpf_object__elf_finish() after data collection to return
1089          * obj_buf to user. If not, we should duplicate the buffer to
1090          * avoid user freeing them before elf finish.
1091          */
1092         obj->efile.obj_buf = obj_buf;
1093         obj->efile.obj_buf_sz = obj_buf_sz;
1094         obj->efile.maps_shndx = -1;
1095         obj->efile.btf_maps_shndx = -1;
1096         obj->efile.data_shndx = -1;
1097         obj->efile.rodata_shndx = -1;
1098         obj->efile.bss_shndx = -1;
1099         obj->efile.st_ops_shndx = -1;
1100         obj->kconfig_map_idx = -1;
1101         obj->rodata_map_idx = -1;
1102
1103         obj->kern_version = get_kernel_version();
1104         obj->loaded = false;
1105
1106         INIT_LIST_HEAD(&obj->list);
1107         list_add(&obj->list, &bpf_objects_list);
1108         return obj;
1109 }
1110
1111 static void bpf_object__elf_finish(struct bpf_object *obj)
1112 {
1113         if (!obj_elf_valid(obj))
1114                 return;
1115
1116         if (obj->efile.elf) {
1117                 elf_end(obj->efile.elf);
1118                 obj->efile.elf = NULL;
1119         }
1120         obj->efile.symbols = NULL;
1121         obj->efile.data = NULL;
1122         obj->efile.rodata = NULL;
1123         obj->efile.bss = NULL;
1124         obj->efile.st_ops_data = NULL;
1125
1126         zfree(&obj->efile.reloc_sects);
1127         obj->efile.nr_reloc_sects = 0;
1128         zclose(obj->efile.fd);
1129         obj->efile.obj_buf = NULL;
1130         obj->efile.obj_buf_sz = 0;
1131 }
1132
1133 static int bpf_object__elf_init(struct bpf_object *obj)
1134 {
1135         int err = 0;
1136         GElf_Ehdr *ep;
1137
1138         if (obj_elf_valid(obj)) {
1139                 pr_warn("elf: init internal error\n");
1140                 return -LIBBPF_ERRNO__LIBELF;
1141         }
1142
1143         if (obj->efile.obj_buf_sz > 0) {
1144                 /*
1145                  * obj_buf should have been validated by
1146                  * bpf_object__open_buffer().
1147                  */
1148                 obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
1149                                             obj->efile.obj_buf_sz);
1150         } else {
1151                 obj->efile.fd = open(obj->path, O_RDONLY);
1152                 if (obj->efile.fd < 0) {
1153                         char errmsg[STRERR_BUFSIZE], *cp;
1154
1155                         err = -errno;
1156                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1157                         pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1158                         return err;
1159                 }
1160
1161                 obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1162         }
1163
1164         if (!obj->efile.elf) {
1165                 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1166                 err = -LIBBPF_ERRNO__LIBELF;
1167                 goto errout;
1168         }
1169
1170         if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
1171                 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1172                 err = -LIBBPF_ERRNO__FORMAT;
1173                 goto errout;
1174         }
1175         ep = &obj->efile.ehdr;
1176
1177         if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
1178                 pr_warn("elf: failed to get section names section index for %s: %s\n",
1179                         obj->path, elf_errmsg(-1));
1180                 err = -LIBBPF_ERRNO__FORMAT;
1181                 goto errout;
1182         }
1183
1184         /* Elf is corrupted/truncated, avoid calling elf_strptr. */
1185         if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
1186                 pr_warn("elf: failed to get section names strings from %s: %s\n",
1187                         obj->path, elf_errmsg(-1));
1188                 err = -LIBBPF_ERRNO__FORMAT;
1189                 goto errout;
1190         }
1191
1192         /* Old LLVM set e_machine to EM_NONE */
1193         if (ep->e_type != ET_REL ||
1194             (ep->e_machine && ep->e_machine != EM_BPF)) {
1195                 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1196                 err = -LIBBPF_ERRNO__FORMAT;
1197                 goto errout;
1198         }
1199
1200         return 0;
1201 errout:
1202         bpf_object__elf_finish(obj);
1203         return err;
1204 }
1205
1206 static int bpf_object__check_endianness(struct bpf_object *obj)
1207 {
1208 #if __BYTE_ORDER == __LITTLE_ENDIAN
1209         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
1210                 return 0;
1211 #elif __BYTE_ORDER == __BIG_ENDIAN
1212         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
1213                 return 0;
1214 #else
1215 # error "Unrecognized __BYTE_ORDER__"
1216 #endif
1217         pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1218         return -LIBBPF_ERRNO__ENDIAN;
1219 }
1220
1221 static int
1222 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1223 {
1224         memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
1225         pr_debug("license of %s is %s\n", obj->path, obj->license);
1226         return 0;
1227 }
1228
1229 static int
1230 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1231 {
1232         __u32 kver;
1233
1234         if (size != sizeof(kver)) {
1235                 pr_warn("invalid kver section in %s\n", obj->path);
1236                 return -LIBBPF_ERRNO__FORMAT;
1237         }
1238         memcpy(&kver, data, sizeof(kver));
1239         obj->kern_version = kver;
1240         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1241         return 0;
1242 }
1243
1244 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1245 {
1246         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1247             type == BPF_MAP_TYPE_HASH_OF_MAPS)
1248                 return true;
1249         return false;
1250 }
1251
1252 int bpf_object__section_size(const struct bpf_object *obj, const char *name,
1253                              __u32 *size)
1254 {
1255         int ret = -ENOENT;
1256
1257         *size = 0;
1258         if (!name) {
1259                 return -EINVAL;
1260         } else if (!strcmp(name, DATA_SEC)) {
1261                 if (obj->efile.data)
1262                         *size = obj->efile.data->d_size;
1263         } else if (!strcmp(name, BSS_SEC)) {
1264                 if (obj->efile.bss)
1265                         *size = obj->efile.bss->d_size;
1266         } else if (!strcmp(name, RODATA_SEC)) {
1267                 if (obj->efile.rodata)
1268                         *size = obj->efile.rodata->d_size;
1269         } else if (!strcmp(name, STRUCT_OPS_SEC)) {
1270                 if (obj->efile.st_ops_data)
1271                         *size = obj->efile.st_ops_data->d_size;
1272         } else {
1273                 Elf_Scn *scn = elf_sec_by_name(obj, name);
1274                 Elf_Data *data = elf_sec_data(obj, scn);
1275
1276                 if (data) {
1277                         ret = 0; /* found it */
1278                         *size = data->d_size;
1279                 }
1280         }
1281
1282         return *size ? 0 : ret;
1283 }
1284
1285 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
1286                                 __u32 *off)
1287 {
1288         Elf_Data *symbols = obj->efile.symbols;
1289         const char *sname;
1290         size_t si;
1291
1292         if (!name || !off)
1293                 return -EINVAL;
1294
1295         for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
1296                 GElf_Sym sym;
1297
1298                 if (!gelf_getsym(symbols, si, &sym))
1299                         continue;
1300                 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1301                     GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
1302                         continue;
1303
1304                 sname = elf_sym_str(obj, sym.st_name);
1305                 if (!sname) {
1306                         pr_warn("failed to get sym name string for var %s\n",
1307                                 name);
1308                         return -EIO;
1309                 }
1310                 if (strcmp(name, sname) == 0) {
1311                         *off = sym.st_value;
1312                         return 0;
1313                 }
1314         }
1315
1316         return -ENOENT;
1317 }
1318
1319 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1320 {
1321         struct bpf_map *new_maps;
1322         size_t new_cap;
1323         int i;
1324
1325         if (obj->nr_maps < obj->maps_cap)
1326                 return &obj->maps[obj->nr_maps++];
1327
1328         new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
1329         new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
1330         if (!new_maps) {
1331                 pr_warn("alloc maps for object failed\n");
1332                 return ERR_PTR(-ENOMEM);
1333         }
1334
1335         obj->maps_cap = new_cap;
1336         obj->maps = new_maps;
1337
1338         /* zero out new maps */
1339         memset(obj->maps + obj->nr_maps, 0,
1340                (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
1341         /*
1342          * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
1343          * when failure (zclose won't close negative fd)).
1344          */
1345         for (i = obj->nr_maps; i < obj->maps_cap; i++) {
1346                 obj->maps[i].fd = -1;
1347                 obj->maps[i].inner_map_fd = -1;
1348         }
1349
1350         return &obj->maps[obj->nr_maps++];
1351 }
1352
1353 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1354 {
1355         long page_sz = sysconf(_SC_PAGE_SIZE);
1356         size_t map_sz;
1357
1358         map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
1359         map_sz = roundup(map_sz, page_sz);
1360         return map_sz;
1361 }
1362
1363 static char *internal_map_name(struct bpf_object *obj,
1364                                enum libbpf_map_type type)
1365 {
1366         char map_name[BPF_OBJ_NAME_LEN], *p;
1367         const char *sfx = libbpf_type_to_btf_name[type];
1368         int sfx_len = max((size_t)7, strlen(sfx));
1369         int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
1370                           strlen(obj->name));
1371
1372         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1373                  sfx_len, libbpf_type_to_btf_name[type]);
1374
1375         /* sanitise map name to characters allowed by kernel */
1376         for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1377                 if (!isalnum(*p) && *p != '_' && *p != '.')
1378                         *p = '_';
1379
1380         return strdup(map_name);
1381 }
1382
1383 static int
1384 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1385                               int sec_idx, void *data, size_t data_sz)
1386 {
1387         struct bpf_map_def *def;
1388         struct bpf_map *map;
1389         int err;
1390
1391         map = bpf_object__add_map(obj);
1392         if (IS_ERR(map))
1393                 return PTR_ERR(map);
1394
1395         map->libbpf_type = type;
1396         map->sec_idx = sec_idx;
1397         map->sec_offset = 0;
1398         map->name = internal_map_name(obj, type);
1399         if (!map->name) {
1400                 pr_warn("failed to alloc map name\n");
1401                 return -ENOMEM;
1402         }
1403
1404         def = &map->def;
1405         def->type = BPF_MAP_TYPE_ARRAY;
1406         def->key_size = sizeof(int);
1407         def->value_size = data_sz;
1408         def->max_entries = 1;
1409         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1410                          ? BPF_F_RDONLY_PROG : 0;
1411         def->map_flags |= BPF_F_MMAPABLE;
1412
1413         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1414                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
1415
1416         map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
1417                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1418         if (map->mmaped == MAP_FAILED) {
1419                 err = -errno;
1420                 map->mmaped = NULL;
1421                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1422                         map->name, err);
1423                 zfree(&map->name);
1424                 return err;
1425         }
1426
1427         if (data)
1428                 memcpy(map->mmaped, data, data_sz);
1429
1430         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1431         return 0;
1432 }
1433
1434 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1435 {
1436         int err;
1437
1438         /*
1439          * Populate obj->maps with libbpf internal maps.
1440          */
1441         if (obj->efile.data_shndx >= 0) {
1442                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1443                                                     obj->efile.data_shndx,
1444                                                     obj->efile.data->d_buf,
1445                                                     obj->efile.data->d_size);
1446                 if (err)
1447                         return err;
1448         }
1449         if (obj->efile.rodata_shndx >= 0) {
1450                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1451                                                     obj->efile.rodata_shndx,
1452                                                     obj->efile.rodata->d_buf,
1453                                                     obj->efile.rodata->d_size);
1454                 if (err)
1455                         return err;
1456
1457                 obj->rodata_map_idx = obj->nr_maps - 1;
1458         }
1459         if (obj->efile.bss_shndx >= 0) {
1460                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1461                                                     obj->efile.bss_shndx,
1462                                                     NULL,
1463                                                     obj->efile.bss->d_size);
1464                 if (err)
1465                         return err;
1466         }
1467         return 0;
1468 }
1469
1470
1471 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1472                                                const void *name)
1473 {
1474         int i;
1475
1476         for (i = 0; i < obj->nr_extern; i++) {
1477                 if (strcmp(obj->externs[i].name, name) == 0)
1478                         return &obj->externs[i];
1479         }
1480         return NULL;
1481 }
1482
1483 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1484                               char value)
1485 {
1486         switch (ext->kcfg.type) {
1487         case KCFG_BOOL:
1488                 if (value == 'm') {
1489                         pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
1490                                 ext->name, value);
1491                         return -EINVAL;
1492                 }
1493                 *(bool *)ext_val = value == 'y' ? true : false;
1494                 break;
1495         case KCFG_TRISTATE:
1496                 if (value == 'y')
1497                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1498                 else if (value == 'm')
1499                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1500                 else /* value == 'n' */
1501                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1502                 break;
1503         case KCFG_CHAR:
1504                 *(char *)ext_val = value;
1505                 break;
1506         case KCFG_UNKNOWN:
1507         case KCFG_INT:
1508         case KCFG_CHAR_ARR:
1509         default:
1510                 pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
1511                         ext->name, value);
1512                 return -EINVAL;
1513         }
1514         ext->is_set = true;
1515         return 0;
1516 }
1517
1518 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1519                               const char *value)
1520 {
1521         size_t len;
1522
1523         if (ext->kcfg.type != KCFG_CHAR_ARR) {
1524                 pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
1525                 return -EINVAL;
1526         }
1527
1528         len = strlen(value);
1529         if (value[len - 1] != '"') {
1530                 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1531                         ext->name, value);
1532                 return -EINVAL;
1533         }
1534
1535         /* strip quotes */
1536         len -= 2;
1537         if (len >= ext->kcfg.sz) {
1538                 pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
1539                         ext->name, value, len, ext->kcfg.sz - 1);
1540                 len = ext->kcfg.sz - 1;
1541         }
1542         memcpy(ext_val, value + 1, len);
1543         ext_val[len] = '\0';
1544         ext->is_set = true;
1545         return 0;
1546 }
1547
1548 static int parse_u64(const char *value, __u64 *res)
1549 {
1550         char *value_end;
1551         int err;
1552
1553         errno = 0;
1554         *res = strtoull(value, &value_end, 0);
1555         if (errno) {
1556                 err = -errno;
1557                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1558                 return err;
1559         }
1560         if (*value_end) {
1561                 pr_warn("failed to parse '%s' as integer completely\n", value);
1562                 return -EINVAL;
1563         }
1564         return 0;
1565 }
1566
1567 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1568 {
1569         int bit_sz = ext->kcfg.sz * 8;
1570
1571         if (ext->kcfg.sz == 8)
1572                 return true;
1573
1574         /* Validate that value stored in u64 fits in integer of `ext->sz`
1575          * bytes size without any loss of information. If the target integer
1576          * is signed, we rely on the following limits of integer type of
1577          * Y bits and subsequent transformation:
1578          *
1579          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1580          *            0 <= X + 2^(Y-1) <= 2^Y - 1
1581          *            0 <= X + 2^(Y-1) <  2^Y
1582          *
1583          *  For unsigned target integer, check that all the (64 - Y) bits are
1584          *  zero.
1585          */
1586         if (ext->kcfg.is_signed)
1587                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1588         else
1589                 return (v >> bit_sz) == 0;
1590 }
1591
1592 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1593                               __u64 value)
1594 {
1595         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1596                 pr_warn("extern (kcfg) %s=%llu should be integer\n",
1597                         ext->name, (unsigned long long)value);
1598                 return -EINVAL;
1599         }
1600         if (!is_kcfg_value_in_range(ext, value)) {
1601                 pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
1602                         ext->name, (unsigned long long)value, ext->kcfg.sz);
1603                 return -ERANGE;
1604         }
1605         switch (ext->kcfg.sz) {
1606                 case 1: *(__u8 *)ext_val = value; break;
1607                 case 2: *(__u16 *)ext_val = value; break;
1608                 case 4: *(__u32 *)ext_val = value; break;
1609                 case 8: *(__u64 *)ext_val = value; break;
1610                 default:
1611                         return -EINVAL;
1612         }
1613         ext->is_set = true;
1614         return 0;
1615 }
1616
1617 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1618                                             char *buf, void *data)
1619 {
1620         struct extern_desc *ext;
1621         char *sep, *value;
1622         int len, err = 0;
1623         void *ext_val;
1624         __u64 num;
1625
1626         if (strncmp(buf, "CONFIG_", 7))
1627                 return 0;
1628
1629         sep = strchr(buf, '=');
1630         if (!sep) {
1631                 pr_warn("failed to parse '%s': no separator\n", buf);
1632                 return -EINVAL;
1633         }
1634
1635         /* Trim ending '\n' */
1636         len = strlen(buf);
1637         if (buf[len - 1] == '\n')
1638                 buf[len - 1] = '\0';
1639         /* Split on '=' and ensure that a value is present. */
1640         *sep = '\0';
1641         if (!sep[1]) {
1642                 *sep = '=';
1643                 pr_warn("failed to parse '%s': no value\n", buf);
1644                 return -EINVAL;
1645         }
1646
1647         ext = find_extern_by_name(obj, buf);
1648         if (!ext || ext->is_set)
1649                 return 0;
1650
1651         ext_val = data + ext->kcfg.data_off;
1652         value = sep + 1;
1653
1654         switch (*value) {
1655         case 'y': case 'n': case 'm':
1656                 err = set_kcfg_value_tri(ext, ext_val, *value);
1657                 break;
1658         case '"':
1659                 err = set_kcfg_value_str(ext, ext_val, value);
1660                 break;
1661         default:
1662                 /* assume integer */
1663                 err = parse_u64(value, &num);
1664                 if (err) {
1665                         pr_warn("extern (kcfg) %s=%s should be integer\n",
1666                                 ext->name, value);
1667                         return err;
1668                 }
1669                 err = set_kcfg_value_num(ext, ext_val, num);
1670                 break;
1671         }
1672         if (err)
1673                 return err;
1674         pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
1675         return 0;
1676 }
1677
1678 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1679 {
1680         char buf[PATH_MAX];
1681         struct utsname uts;
1682         int len, err = 0;
1683         gzFile file;
1684
1685         uname(&uts);
1686         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1687         if (len < 0)
1688                 return -EINVAL;
1689         else if (len >= PATH_MAX)
1690                 return -ENAMETOOLONG;
1691
1692         /* gzopen also accepts uncompressed files. */
1693         file = gzopen(buf, "r");
1694         if (!file)
1695                 file = gzopen("/proc/config.gz", "r");
1696
1697         if (!file) {
1698                 pr_warn("failed to open system Kconfig\n");
1699                 return -ENOENT;
1700         }
1701
1702         while (gzgets(file, buf, sizeof(buf))) {
1703                 err = bpf_object__process_kconfig_line(obj, buf, data);
1704                 if (err) {
1705                         pr_warn("error parsing system Kconfig line '%s': %d\n",
1706                                 buf, err);
1707                         goto out;
1708                 }
1709         }
1710
1711 out:
1712         gzclose(file);
1713         return err;
1714 }
1715
1716 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1717                                         const char *config, void *data)
1718 {
1719         char buf[PATH_MAX];
1720         int err = 0;
1721         FILE *file;
1722
1723         file = fmemopen((void *)config, strlen(config), "r");
1724         if (!file) {
1725                 err = -errno;
1726                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
1727                 return err;
1728         }
1729
1730         while (fgets(buf, sizeof(buf), file)) {
1731                 err = bpf_object__process_kconfig_line(obj, buf, data);
1732                 if (err) {
1733                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1734                                 buf, err);
1735                         break;
1736                 }
1737         }
1738
1739         fclose(file);
1740         return err;
1741 }
1742
1743 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
1744 {
1745         struct extern_desc *last_ext = NULL, *ext;
1746         size_t map_sz;
1747         int i, err;
1748
1749         for (i = 0; i < obj->nr_extern; i++) {
1750                 ext = &obj->externs[i];
1751                 if (ext->type == EXT_KCFG)
1752                         last_ext = ext;
1753         }
1754
1755         if (!last_ext)
1756                 return 0;
1757
1758         map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
1759         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
1760                                             obj->efile.symbols_shndx,
1761                                             NULL, map_sz);
1762         if (err)
1763                 return err;
1764
1765         obj->kconfig_map_idx = obj->nr_maps - 1;
1766
1767         return 0;
1768 }
1769
1770 static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
1771 {
1772         Elf_Data *symbols = obj->efile.symbols;
1773         int i, map_def_sz = 0, nr_maps = 0, nr_syms;
1774         Elf_Data *data = NULL;
1775         Elf_Scn *scn;
1776
1777         if (obj->efile.maps_shndx < 0)
1778                 return 0;
1779
1780         if (!symbols)
1781                 return -EINVAL;
1782
1783
1784         scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
1785         data = elf_sec_data(obj, scn);
1786         if (!scn || !data) {
1787                 pr_warn("elf: failed to get legacy map definitions for %s\n",
1788                         obj->path);
1789                 return -EINVAL;
1790         }
1791
1792         /*
1793          * Count number of maps. Each map has a name.
1794          * Array of maps is not supported: only the first element is
1795          * considered.
1796          *
1797          * TODO: Detect array of map and report error.
1798          */
1799         nr_syms = symbols->d_size / sizeof(GElf_Sym);
1800         for (i = 0; i < nr_syms; i++) {
1801                 GElf_Sym sym;
1802
1803                 if (!gelf_getsym(symbols, i, &sym))
1804                         continue;
1805                 if (sym.st_shndx != obj->efile.maps_shndx)
1806                         continue;
1807                 nr_maps++;
1808         }
1809         /* Assume equally sized map definitions */
1810         pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
1811                  nr_maps, data->d_size, obj->path);
1812
1813         if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
1814                 pr_warn("elf: unable to determine legacy map definition size in %s\n",
1815                         obj->path);
1816                 return -EINVAL;
1817         }
1818         map_def_sz = data->d_size / nr_maps;
1819
1820         /* Fill obj->maps using data in "maps" section.  */
1821         for (i = 0; i < nr_syms; i++) {
1822                 GElf_Sym sym;
1823                 const char *map_name;
1824                 struct bpf_map_def *def;
1825                 struct bpf_map *map;
1826
1827                 if (!gelf_getsym(symbols, i, &sym))
1828                         continue;
1829                 if (sym.st_shndx != obj->efile.maps_shndx)
1830                         continue;
1831
1832                 map = bpf_object__add_map(obj);
1833                 if (IS_ERR(map))
1834                         return PTR_ERR(map);
1835
1836                 map_name = elf_sym_str(obj, sym.st_name);
1837                 if (!map_name) {
1838                         pr_warn("failed to get map #%d name sym string for obj %s\n",
1839                                 i, obj->path);
1840                         return -LIBBPF_ERRNO__FORMAT;
1841                 }
1842
1843                 map->libbpf_type = LIBBPF_MAP_UNSPEC;
1844                 map->sec_idx = sym.st_shndx;
1845                 map->sec_offset = sym.st_value;
1846                 pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
1847                          map_name, map->sec_idx, map->sec_offset);
1848                 if (sym.st_value + map_def_sz > data->d_size) {
1849                         pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
1850                                 obj->path, map_name);
1851                         return -EINVAL;
1852                 }
1853
1854                 map->name = strdup(map_name);
1855                 if (!map->name) {
1856                         pr_warn("failed to alloc map name\n");
1857                         return -ENOMEM;
1858                 }
1859                 pr_debug("map %d is \"%s\"\n", i, map->name);
1860                 def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
1861                 /*
1862                  * If the definition of the map in the object file fits in
1863                  * bpf_map_def, copy it.  Any extra fields in our version
1864                  * of bpf_map_def will default to zero as a result of the
1865                  * calloc above.
1866                  */
1867                 if (map_def_sz <= sizeof(struct bpf_map_def)) {
1868                         memcpy(&map->def, def, map_def_sz);
1869                 } else {
1870                         /*
1871                          * Here the map structure being read is bigger than what
1872                          * we expect, truncate if the excess bits are all zero.
1873                          * If they are not zero, reject this map as
1874                          * incompatible.
1875                          */
1876                         char *b;
1877
1878                         for (b = ((char *)def) + sizeof(struct bpf_map_def);
1879                              b < ((char *)def) + map_def_sz; b++) {
1880                                 if (*b != 0) {
1881                                         pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
1882                                                 obj->path, map_name);
1883                                         if (strict)
1884                                                 return -EINVAL;
1885                                 }
1886                         }
1887                         memcpy(&map->def, def, sizeof(struct bpf_map_def));
1888                 }
1889         }
1890         return 0;
1891 }
1892
1893 static const struct btf_type *
1894 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
1895 {
1896         const struct btf_type *t = btf__type_by_id(btf, id);
1897
1898         if (res_id)
1899                 *res_id = id;
1900
1901         while (btf_is_mod(t) || btf_is_typedef(t)) {
1902                 if (res_id)
1903                         *res_id = t->type;
1904                 t = btf__type_by_id(btf, t->type);
1905         }
1906
1907         return t;
1908 }
1909
1910 static const struct btf_type *
1911 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
1912 {
1913         const struct btf_type *t;
1914
1915         t = skip_mods_and_typedefs(btf, id, NULL);
1916         if (!btf_is_ptr(t))
1917                 return NULL;
1918
1919         t = skip_mods_and_typedefs(btf, t->type, res_id);
1920
1921         return btf_is_func_proto(t) ? t : NULL;
1922 }
1923
1924 static const char *btf_kind_str(const struct btf_type *t)
1925 {
1926         switch (btf_kind(t)) {
1927         case BTF_KIND_UNKN: return "void";
1928         case BTF_KIND_INT: return "int";
1929         case BTF_KIND_PTR: return "ptr";
1930         case BTF_KIND_ARRAY: return "array";
1931         case BTF_KIND_STRUCT: return "struct";
1932         case BTF_KIND_UNION: return "union";
1933         case BTF_KIND_ENUM: return "enum";
1934         case BTF_KIND_FWD: return "fwd";
1935         case BTF_KIND_TYPEDEF: return "typedef";
1936         case BTF_KIND_VOLATILE: return "volatile";
1937         case BTF_KIND_CONST: return "const";
1938         case BTF_KIND_RESTRICT: return "restrict";
1939         case BTF_KIND_FUNC: return "func";
1940         case BTF_KIND_FUNC_PROTO: return "func_proto";
1941         case BTF_KIND_VAR: return "var";
1942         case BTF_KIND_DATASEC: return "datasec";
1943         case BTF_KIND_FLOAT: return "float";
1944         default: return "unknown";
1945         }
1946 }
1947
1948 /*
1949  * Fetch integer attribute of BTF map definition. Such attributes are
1950  * represented using a pointer to an array, in which dimensionality of array
1951  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
1952  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
1953  * type definition, while using only sizeof(void *) space in ELF data section.
1954  */
1955 static bool get_map_field_int(const char *map_name, const struct btf *btf,
1956                               const struct btf_member *m, __u32 *res)
1957 {
1958         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
1959         const char *name = btf__name_by_offset(btf, m->name_off);
1960         const struct btf_array *arr_info;
1961         const struct btf_type *arr_t;
1962
1963         if (!btf_is_ptr(t)) {
1964                 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
1965                         map_name, name, btf_kind_str(t));
1966                 return false;
1967         }
1968
1969         arr_t = btf__type_by_id(btf, t->type);
1970         if (!arr_t) {
1971                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
1972                         map_name, name, t->type);
1973                 return false;
1974         }
1975         if (!btf_is_array(arr_t)) {
1976                 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
1977                         map_name, name, btf_kind_str(arr_t));
1978                 return false;
1979         }
1980         arr_info = btf_array(arr_t);
1981         *res = arr_info->nelems;
1982         return true;
1983 }
1984
1985 static int build_map_pin_path(struct bpf_map *map, const char *path)
1986 {
1987         char buf[PATH_MAX];
1988         int len;
1989
1990         if (!path)
1991                 path = "/sys/fs/bpf";
1992
1993         len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
1994         if (len < 0)
1995                 return -EINVAL;
1996         else if (len >= PATH_MAX)
1997                 return -ENAMETOOLONG;
1998
1999         return bpf_map__set_pin_path(map, buf);
2000 }
2001
2002
2003 static int parse_btf_map_def(struct bpf_object *obj,
2004                              struct bpf_map *map,
2005                              const struct btf_type *def,
2006                              bool strict, bool is_inner,
2007                              const char *pin_root_path)
2008 {
2009         const struct btf_type *t;
2010         const struct btf_member *m;
2011         int vlen, i;
2012
2013         vlen = btf_vlen(def);
2014         m = btf_members(def);
2015         for (i = 0; i < vlen; i++, m++) {
2016                 const char *name = btf__name_by_offset(obj->btf, m->name_off);
2017
2018                 if (!name) {
2019                         pr_warn("map '%s': invalid field #%d.\n", map->name, i);
2020                         return -EINVAL;
2021                 }
2022                 if (strcmp(name, "type") == 0) {
2023                         if (!get_map_field_int(map->name, obj->btf, m,
2024                                                &map->def.type))
2025                                 return -EINVAL;
2026                         pr_debug("map '%s': found type = %u.\n",
2027                                  map->name, map->def.type);
2028                 } else if (strcmp(name, "max_entries") == 0) {
2029                         if (!get_map_field_int(map->name, obj->btf, m,
2030                                                &map->def.max_entries))
2031                                 return -EINVAL;
2032                         pr_debug("map '%s': found max_entries = %u.\n",
2033                                  map->name, map->def.max_entries);
2034                 } else if (strcmp(name, "map_flags") == 0) {
2035                         if (!get_map_field_int(map->name, obj->btf, m,
2036                                                &map->def.map_flags))
2037                                 return -EINVAL;
2038                         pr_debug("map '%s': found map_flags = %u.\n",
2039                                  map->name, map->def.map_flags);
2040                 } else if (strcmp(name, "numa_node") == 0) {
2041                         if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
2042                                 return -EINVAL;
2043                         pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
2044                 } else if (strcmp(name, "key_size") == 0) {
2045                         __u32 sz;
2046
2047                         if (!get_map_field_int(map->name, obj->btf, m, &sz))
2048                                 return -EINVAL;
2049                         pr_debug("map '%s': found key_size = %u.\n",
2050                                  map->name, sz);
2051                         if (map->def.key_size && map->def.key_size != sz) {
2052                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
2053                                         map->name, map->def.key_size, sz);
2054                                 return -EINVAL;
2055                         }
2056                         map->def.key_size = sz;
2057                 } else if (strcmp(name, "key") == 0) {
2058                         __s64 sz;
2059
2060                         t = btf__type_by_id(obj->btf, m->type);
2061                         if (!t) {
2062                                 pr_warn("map '%s': key type [%d] not found.\n",
2063                                         map->name, m->type);
2064                                 return -EINVAL;
2065                         }
2066                         if (!btf_is_ptr(t)) {
2067                                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2068                                         map->name, btf_kind_str(t));
2069                                 return -EINVAL;
2070                         }
2071                         sz = btf__resolve_size(obj->btf, t->type);
2072                         if (sz < 0) {
2073                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2074                                         map->name, t->type, (ssize_t)sz);
2075                                 return sz;
2076                         }
2077                         pr_debug("map '%s': found key [%u], sz = %zd.\n",
2078                                  map->name, t->type, (ssize_t)sz);
2079                         if (map->def.key_size && map->def.key_size != sz) {
2080                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2081                                         map->name, map->def.key_size, (ssize_t)sz);
2082                                 return -EINVAL;
2083                         }
2084                         map->def.key_size = sz;
2085                         map->btf_key_type_id = t->type;
2086                 } else if (strcmp(name, "value_size") == 0) {
2087                         __u32 sz;
2088
2089                         if (!get_map_field_int(map->name, obj->btf, m, &sz))
2090                                 return -EINVAL;
2091                         pr_debug("map '%s': found value_size = %u.\n",
2092                                  map->name, sz);
2093                         if (map->def.value_size && map->def.value_size != sz) {
2094                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2095                                         map->name, map->def.value_size, sz);
2096                                 return -EINVAL;
2097                         }
2098                         map->def.value_size = sz;
2099                 } else if (strcmp(name, "value") == 0) {
2100                         __s64 sz;
2101
2102                         t = btf__type_by_id(obj->btf, m->type);
2103                         if (!t) {
2104                                 pr_warn("map '%s': value type [%d] not found.\n",
2105                                         map->name, m->type);
2106                                 return -EINVAL;
2107                         }
2108                         if (!btf_is_ptr(t)) {
2109                                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2110                                         map->name, btf_kind_str(t));
2111                                 return -EINVAL;
2112                         }
2113                         sz = btf__resolve_size(obj->btf, t->type);
2114                         if (sz < 0) {
2115                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2116                                         map->name, t->type, (ssize_t)sz);
2117                                 return sz;
2118                         }
2119                         pr_debug("map '%s': found value [%u], sz = %zd.\n",
2120                                  map->name, t->type, (ssize_t)sz);
2121                         if (map->def.value_size && map->def.value_size != sz) {
2122                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2123                                         map->name, map->def.value_size, (ssize_t)sz);
2124                                 return -EINVAL;
2125                         }
2126                         map->def.value_size = sz;
2127                         map->btf_value_type_id = t->type;
2128                 }
2129                 else if (strcmp(name, "values") == 0) {
2130                         int err;
2131
2132                         if (is_inner) {
2133                                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2134                                         map->name);
2135                                 return -ENOTSUP;
2136                         }
2137                         if (i != vlen - 1) {
2138                                 pr_warn("map '%s': '%s' member should be last.\n",
2139                                         map->name, name);
2140                                 return -EINVAL;
2141                         }
2142                         if (!bpf_map_type__is_map_in_map(map->def.type)) {
2143                                 pr_warn("map '%s': should be map-in-map.\n",
2144                                         map->name);
2145                                 return -ENOTSUP;
2146                         }
2147                         if (map->def.value_size && map->def.value_size != 4) {
2148                                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2149                                         map->name, map->def.value_size);
2150                                 return -EINVAL;
2151                         }
2152                         map->def.value_size = 4;
2153                         t = btf__type_by_id(obj->btf, m->type);
2154                         if (!t) {
2155                                 pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
2156                                         map->name, m->type);
2157                                 return -EINVAL;
2158                         }
2159                         if (!btf_is_array(t) || btf_array(t)->nelems) {
2160                                 pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
2161                                         map->name);
2162                                 return -EINVAL;
2163                         }
2164                         t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
2165                                                    NULL);
2166                         if (!btf_is_ptr(t)) {
2167                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2168                                         map->name, btf_kind_str(t));
2169                                 return -EINVAL;
2170                         }
2171                         t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
2172                         if (!btf_is_struct(t)) {
2173                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2174                                         map->name, btf_kind_str(t));
2175                                 return -EINVAL;
2176                         }
2177
2178                         map->inner_map = calloc(1, sizeof(*map->inner_map));
2179                         if (!map->inner_map)
2180                                 return -ENOMEM;
2181                         map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
2182                         map->inner_map->name = malloc(strlen(map->name) +
2183                                                       sizeof(".inner") + 1);
2184                         if (!map->inner_map->name)
2185                                 return -ENOMEM;
2186                         sprintf(map->inner_map->name, "%s.inner", map->name);
2187
2188                         err = parse_btf_map_def(obj, map->inner_map, t, strict,
2189                                                 true /* is_inner */, NULL);
2190                         if (err)
2191                                 return err;
2192                 } else if (strcmp(name, "pinning") == 0) {
2193                         __u32 val;
2194                         int err;
2195
2196                         if (is_inner) {
2197                                 pr_debug("map '%s': inner def can't be pinned.\n",
2198                                          map->name);
2199                                 return -EINVAL;
2200                         }
2201                         if (!get_map_field_int(map->name, obj->btf, m, &val))
2202                                 return -EINVAL;
2203                         pr_debug("map '%s': found pinning = %u.\n",
2204                                  map->name, val);
2205
2206                         if (val != LIBBPF_PIN_NONE &&
2207                             val != LIBBPF_PIN_BY_NAME) {
2208                                 pr_warn("map '%s': invalid pinning value %u.\n",
2209                                         map->name, val);
2210                                 return -EINVAL;
2211                         }
2212                         if (val == LIBBPF_PIN_BY_NAME) {
2213                                 err = build_map_pin_path(map, pin_root_path);
2214                                 if (err) {
2215                                         pr_warn("map '%s': couldn't build pin path.\n",
2216                                                 map->name);
2217                                         return err;
2218                                 }
2219                         }
2220                 } else {
2221                         if (strict) {
2222                                 pr_warn("map '%s': unknown field '%s'.\n",
2223                                         map->name, name);
2224                                 return -ENOTSUP;
2225                         }
2226                         pr_debug("map '%s': ignoring unknown field '%s'.\n",
2227                                  map->name, name);
2228                 }
2229         }
2230
2231         if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
2232                 pr_warn("map '%s': map type isn't specified.\n", map->name);
2233                 return -EINVAL;
2234         }
2235
2236         return 0;
2237 }
2238
2239 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2240                                          const struct btf_type *sec,
2241                                          int var_idx, int sec_idx,
2242                                          const Elf_Data *data, bool strict,
2243                                          const char *pin_root_path)
2244 {
2245         const struct btf_type *var, *def;
2246         const struct btf_var_secinfo *vi;
2247         const struct btf_var *var_extra;
2248         const char *map_name;
2249         struct bpf_map *map;
2250
2251         vi = btf_var_secinfos(sec) + var_idx;
2252         var = btf__type_by_id(obj->btf, vi->type);
2253         var_extra = btf_var(var);
2254         map_name = btf__name_by_offset(obj->btf, var->name_off);
2255
2256         if (map_name == NULL || map_name[0] == '\0') {
2257                 pr_warn("map #%d: empty name.\n", var_idx);
2258                 return -EINVAL;
2259         }
2260         if ((__u64)vi->offset + vi->size > data->d_size) {
2261                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2262                 return -EINVAL;
2263         }
2264         if (!btf_is_var(var)) {
2265                 pr_warn("map '%s': unexpected var kind %s.\n",
2266                         map_name, btf_kind_str(var));
2267                 return -EINVAL;
2268         }
2269         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
2270             var_extra->linkage != BTF_VAR_STATIC) {
2271                 pr_warn("map '%s': unsupported var linkage %u.\n",
2272                         map_name, var_extra->linkage);
2273                 return -EOPNOTSUPP;
2274         }
2275
2276         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2277         if (!btf_is_struct(def)) {
2278                 pr_warn("map '%s': unexpected def kind %s.\n",
2279                         map_name, btf_kind_str(var));
2280                 return -EINVAL;
2281         }
2282         if (def->size > vi->size) {
2283                 pr_warn("map '%s': invalid def size.\n", map_name);
2284                 return -EINVAL;
2285         }
2286
2287         map = bpf_object__add_map(obj);
2288         if (IS_ERR(map))
2289                 return PTR_ERR(map);
2290         map->name = strdup(map_name);
2291         if (!map->name) {
2292                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2293                 return -ENOMEM;
2294         }
2295         map->libbpf_type = LIBBPF_MAP_UNSPEC;
2296         map->def.type = BPF_MAP_TYPE_UNSPEC;
2297         map->sec_idx = sec_idx;
2298         map->sec_offset = vi->offset;
2299         map->btf_var_idx = var_idx;
2300         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2301                  map_name, map->sec_idx, map->sec_offset);
2302
2303         return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
2304 }
2305
2306 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2307                                           const char *pin_root_path)
2308 {
2309         const struct btf_type *sec = NULL;
2310         int nr_types, i, vlen, err;
2311         const struct btf_type *t;
2312         const char *name;
2313         Elf_Data *data;
2314         Elf_Scn *scn;
2315
2316         if (obj->efile.btf_maps_shndx < 0)
2317                 return 0;
2318
2319         scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2320         data = elf_sec_data(obj, scn);
2321         if (!scn || !data) {
2322                 pr_warn("elf: failed to get %s map definitions for %s\n",
2323                         MAPS_ELF_SEC, obj->path);
2324                 return -EINVAL;
2325         }
2326
2327         nr_types = btf__get_nr_types(obj->btf);
2328         for (i = 1; i <= nr_types; i++) {
2329                 t = btf__type_by_id(obj->btf, i);
2330                 if (!btf_is_datasec(t))
2331                         continue;
2332                 name = btf__name_by_offset(obj->btf, t->name_off);
2333                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2334                         sec = t;
2335                         obj->efile.btf_maps_sec_btf_id = i;
2336                         break;
2337                 }
2338         }
2339
2340         if (!sec) {
2341                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2342                 return -ENOENT;
2343         }
2344
2345         vlen = btf_vlen(sec);
2346         for (i = 0; i < vlen; i++) {
2347                 err = bpf_object__init_user_btf_map(obj, sec, i,
2348                                                     obj->efile.btf_maps_shndx,
2349                                                     data, strict,
2350                                                     pin_root_path);
2351                 if (err)
2352                         return err;
2353         }
2354
2355         return 0;
2356 }
2357
2358 static int bpf_object__init_maps(struct bpf_object *obj,
2359                                  const struct bpf_object_open_opts *opts)
2360 {
2361         const char *pin_root_path;
2362         bool strict;
2363         int err;
2364
2365         strict = !OPTS_GET(opts, relaxed_maps, false);
2366         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2367
2368         err = bpf_object__init_user_maps(obj, strict);
2369         err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2370         err = err ?: bpf_object__init_global_data_maps(obj);
2371         err = err ?: bpf_object__init_kconfig_map(obj);
2372         err = err ?: bpf_object__init_struct_ops_maps(obj);
2373         if (err)
2374                 return err;
2375
2376         return 0;
2377 }
2378
2379 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2380 {
2381         GElf_Shdr sh;
2382
2383         if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
2384                 return false;
2385
2386         return sh.sh_flags & SHF_EXECINSTR;
2387 }
2388
2389 static bool btf_needs_sanitization(struct bpf_object *obj)
2390 {
2391         bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
2392         bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
2393         bool has_float = kernel_supports(FEAT_BTF_FLOAT);
2394         bool has_func = kernel_supports(FEAT_BTF_FUNC);
2395
2396         return !has_func || !has_datasec || !has_func_global || !has_float;
2397 }
2398
2399 static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2400 {
2401         bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
2402         bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
2403         bool has_float = kernel_supports(FEAT_BTF_FLOAT);
2404         bool has_func = kernel_supports(FEAT_BTF_FUNC);
2405         struct btf_type *t;
2406         int i, j, vlen;
2407
2408         for (i = 1; i <= btf__get_nr_types(btf); i++) {
2409                 t = (struct btf_type *)btf__type_by_id(btf, i);
2410
2411                 if (!has_datasec && btf_is_var(t)) {
2412                         /* replace VAR with INT */
2413                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2414                         /*
2415                          * using size = 1 is the safest choice, 4 will be too
2416                          * big and cause kernel BTF validation failure if
2417                          * original variable took less than 4 bytes
2418                          */
2419                         t->size = 1;
2420                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2421                 } else if (!has_datasec && btf_is_datasec(t)) {
2422                         /* replace DATASEC with STRUCT */
2423                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
2424                         struct btf_member *m = btf_members(t);
2425                         struct btf_type *vt;
2426                         char *name;
2427
2428                         name = (char *)btf__name_by_offset(btf, t->name_off);
2429                         while (*name) {
2430                                 if (*name == '.')
2431                                         *name = '_';
2432                                 name++;
2433                         }
2434
2435                         vlen = btf_vlen(t);
2436                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2437                         for (j = 0; j < vlen; j++, v++, m++) {
2438                                 /* order of field assignments is important */
2439                                 m->offset = v->offset * 8;
2440                                 m->type = v->type;
2441                                 /* preserve variable name as member name */
2442                                 vt = (void *)btf__type_by_id(btf, v->type);
2443                                 m->name_off = vt->name_off;
2444                         }
2445                 } else if (!has_func && btf_is_func_proto(t)) {
2446                         /* replace FUNC_PROTO with ENUM */
2447                         vlen = btf_vlen(t);
2448                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2449                         t->size = sizeof(__u32); /* kernel enforced */
2450                 } else if (!has_func && btf_is_func(t)) {
2451                         /* replace FUNC with TYPEDEF */
2452                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2453                 } else if (!has_func_global && btf_is_func(t)) {
2454                         /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2455                         t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2456                 } else if (!has_float && btf_is_float(t)) {
2457                         /* replace FLOAT with an equally-sized empty STRUCT;
2458                          * since C compilers do not accept e.g. "float" as a
2459                          * valid struct name, make it anonymous
2460                          */
2461                         t->name_off = 0;
2462                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
2463                 }
2464         }
2465 }
2466
2467 static bool libbpf_needs_btf(const struct bpf_object *obj)
2468 {
2469         return obj->efile.btf_maps_shndx >= 0 ||
2470                obj->efile.st_ops_shndx >= 0 ||
2471                obj->nr_extern > 0;
2472 }
2473
2474 static bool kernel_needs_btf(const struct bpf_object *obj)
2475 {
2476         return obj->efile.st_ops_shndx >= 0;
2477 }
2478
2479 static int bpf_object__init_btf(struct bpf_object *obj,
2480                                 Elf_Data *btf_data,
2481                                 Elf_Data *btf_ext_data)
2482 {
2483         int err = -ENOENT;
2484
2485         if (btf_data) {
2486                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2487                 if (IS_ERR(obj->btf)) {
2488                         err = PTR_ERR(obj->btf);
2489                         obj->btf = NULL;
2490                         pr_warn("Error loading ELF section %s: %d.\n",
2491                                 BTF_ELF_SEC, err);
2492                         goto out;
2493                 }
2494                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2495                 btf__set_pointer_size(obj->btf, 8);
2496                 err = 0;
2497         }
2498         if (btf_ext_data) {
2499                 if (!obj->btf) {
2500                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2501                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2502                         goto out;
2503                 }
2504                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
2505                                             btf_ext_data->d_size);
2506                 if (IS_ERR(obj->btf_ext)) {
2507                         pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
2508                                 BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
2509                         obj->btf_ext = NULL;
2510                         goto out;
2511                 }
2512         }
2513 out:
2514         if (err && libbpf_needs_btf(obj)) {
2515                 pr_warn("BTF is required, but is missing or corrupted.\n");
2516                 return err;
2517         }
2518         return 0;
2519 }
2520
2521 static int bpf_object__finalize_btf(struct bpf_object *obj)
2522 {
2523         int err;
2524
2525         if (!obj->btf)
2526                 return 0;
2527
2528         err = btf__finalize_data(obj, obj->btf);
2529         if (err) {
2530                 pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
2531                 return err;
2532         }
2533
2534         return 0;
2535 }
2536
2537 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
2538 {
2539         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
2540             prog->type == BPF_PROG_TYPE_LSM)
2541                 return true;
2542
2543         /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
2544          * also need vmlinux BTF
2545          */
2546         if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
2547                 return true;
2548
2549         return false;
2550 }
2551
2552 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
2553 {
2554         struct bpf_program *prog;
2555         int i;
2556
2557         /* CO-RE relocations need kernel BTF */
2558         if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
2559                 return true;
2560
2561         /* Support for typed ksyms needs kernel BTF */
2562         for (i = 0; i < obj->nr_extern; i++) {
2563                 const struct extern_desc *ext;
2564
2565                 ext = &obj->externs[i];
2566                 if (ext->type == EXT_KSYM && ext->ksym.type_id)
2567                         return true;
2568         }
2569
2570         bpf_object__for_each_program(prog, obj) {
2571                 if (!prog->load)
2572                         continue;
2573                 if (prog_needs_vmlinux_btf(prog))
2574                         return true;
2575         }
2576
2577         return false;
2578 }
2579
2580 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
2581 {
2582         int err;
2583
2584         /* btf_vmlinux could be loaded earlier */
2585         if (obj->btf_vmlinux)
2586                 return 0;
2587
2588         if (!force && !obj_needs_vmlinux_btf(obj))
2589                 return 0;
2590
2591         obj->btf_vmlinux = libbpf_find_kernel_btf();
2592         if (IS_ERR(obj->btf_vmlinux)) {
2593                 err = PTR_ERR(obj->btf_vmlinux);
2594                 pr_warn("Error loading vmlinux BTF: %d\n", err);
2595                 obj->btf_vmlinux = NULL;
2596                 return err;
2597         }
2598         return 0;
2599 }
2600
2601 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
2602 {
2603         struct btf *kern_btf = obj->btf;
2604         bool btf_mandatory, sanitize;
2605         int err = 0;
2606
2607         if (!obj->btf)
2608                 return 0;
2609
2610         if (!kernel_supports(FEAT_BTF)) {
2611                 if (kernel_needs_btf(obj)) {
2612                         err = -EOPNOTSUPP;
2613                         goto report;
2614                 }
2615                 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
2616                 return 0;
2617         }
2618
2619         sanitize = btf_needs_sanitization(obj);
2620         if (sanitize) {
2621                 const void *raw_data;
2622                 __u32 sz;
2623
2624                 /* clone BTF to sanitize a copy and leave the original intact */
2625                 raw_data = btf__get_raw_data(obj->btf, &sz);
2626                 kern_btf = btf__new(raw_data, sz);
2627                 if (IS_ERR(kern_btf))
2628                         return PTR_ERR(kern_btf);
2629
2630                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2631                 btf__set_pointer_size(obj->btf, 8);
2632                 bpf_object__sanitize_btf(obj, kern_btf);
2633         }
2634
2635         err = btf__load(kern_btf);
2636         if (sanitize) {
2637                 if (!err) {
2638                         /* move fd to libbpf's BTF */
2639                         btf__set_fd(obj->btf, btf__fd(kern_btf));
2640                         btf__set_fd(kern_btf, -1);
2641                 }
2642                 btf__free(kern_btf);
2643         }
2644 report:
2645         if (err) {
2646                 btf_mandatory = kernel_needs_btf(obj);
2647                 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
2648                         btf_mandatory ? "BTF is mandatory, can't proceed."
2649                                       : "BTF is optional, ignoring.");
2650                 if (!btf_mandatory)
2651                         err = 0;
2652         }
2653         return err;
2654 }
2655
2656 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
2657 {
2658         const char *name;
2659
2660         name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
2661         if (!name) {
2662                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2663                         off, obj->path, elf_errmsg(-1));
2664                 return NULL;
2665         }
2666
2667         return name;
2668 }
2669
2670 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
2671 {
2672         const char *name;
2673
2674         name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
2675         if (!name) {
2676                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2677                         off, obj->path, elf_errmsg(-1));
2678                 return NULL;
2679         }
2680
2681         return name;
2682 }
2683
2684 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
2685 {
2686         Elf_Scn *scn;
2687
2688         scn = elf_getscn(obj->efile.elf, idx);
2689         if (!scn) {
2690                 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
2691                         idx, obj->path, elf_errmsg(-1));
2692                 return NULL;
2693         }
2694         return scn;
2695 }
2696
2697 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
2698 {
2699         Elf_Scn *scn = NULL;
2700         Elf *elf = obj->efile.elf;
2701         const char *sec_name;
2702
2703         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2704                 sec_name = elf_sec_name(obj, scn);
2705                 if (!sec_name)
2706                         return NULL;
2707
2708                 if (strcmp(sec_name, name) != 0)
2709                         continue;
2710
2711                 return scn;
2712         }
2713         return NULL;
2714 }
2715
2716 static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
2717 {
2718         if (!scn)
2719                 return -EINVAL;
2720
2721         if (gelf_getshdr(scn, hdr) != hdr) {
2722                 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
2723                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2724                 return -EINVAL;
2725         }
2726
2727         return 0;
2728 }
2729
2730 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
2731 {
2732         const char *name;
2733         GElf_Shdr sh;
2734
2735         if (!scn)
2736                 return NULL;
2737
2738         if (elf_sec_hdr(obj, scn, &sh))
2739                 return NULL;
2740
2741         name = elf_sec_str(obj, sh.sh_name);
2742         if (!name) {
2743                 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
2744                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2745                 return NULL;
2746         }
2747
2748         return name;
2749 }
2750
2751 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
2752 {
2753         Elf_Data *data;
2754
2755         if (!scn)
2756                 return NULL;
2757
2758         data = elf_getdata(scn, 0);
2759         if (!data) {
2760                 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
2761                         elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
2762                         obj->path, elf_errmsg(-1));
2763                 return NULL;
2764         }
2765
2766         return data;
2767 }
2768
2769 static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
2770                               size_t off, __u32 sym_type, GElf_Sym *sym)
2771 {
2772         Elf_Data *symbols = obj->efile.symbols;
2773         size_t n = symbols->d_size / sizeof(GElf_Sym);
2774         int i;
2775
2776         for (i = 0; i < n; i++) {
2777                 if (!gelf_getsym(symbols, i, sym))
2778                         continue;
2779                 if (sym->st_shndx != sec_idx || sym->st_value != off)
2780                         continue;
2781                 if (GELF_ST_TYPE(sym->st_info) != sym_type)
2782                         continue;
2783                 return 0;
2784         }
2785
2786         return -ENOENT;
2787 }
2788
2789 static bool is_sec_name_dwarf(const char *name)
2790 {
2791         /* approximation, but the actual list is too long */
2792         return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
2793 }
2794
2795 static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
2796 {
2797         /* no special handling of .strtab */
2798         if (hdr->sh_type == SHT_STRTAB)
2799                 return true;
2800
2801         /* ignore .llvm_addrsig section as well */
2802         if (hdr->sh_type == SHT_LLVM_ADDRSIG)
2803                 return true;
2804
2805         /* no subprograms will lead to an empty .text section, ignore it */
2806         if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
2807             strcmp(name, ".text") == 0)
2808                 return true;
2809
2810         /* DWARF sections */
2811         if (is_sec_name_dwarf(name))
2812                 return true;
2813
2814         if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
2815                 name += sizeof(".rel") - 1;
2816                 /* DWARF section relocations */
2817                 if (is_sec_name_dwarf(name))
2818                         return true;
2819
2820                 /* .BTF and .BTF.ext don't need relocations */
2821                 if (strcmp(name, BTF_ELF_SEC) == 0 ||
2822                     strcmp(name, BTF_EXT_ELF_SEC) == 0)
2823                         return true;
2824         }
2825
2826         return false;
2827 }
2828
2829 static int cmp_progs(const void *_a, const void *_b)
2830 {
2831         const struct bpf_program *a = _a;
2832         const struct bpf_program *b = _b;
2833
2834         if (a->sec_idx != b->sec_idx)
2835                 return a->sec_idx < b->sec_idx ? -1 : 1;
2836
2837         /* sec_insn_off can't be the same within the section */
2838         return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
2839 }
2840
2841 static int bpf_object__elf_collect(struct bpf_object *obj)
2842 {
2843         Elf *elf = obj->efile.elf;
2844         Elf_Data *btf_ext_data = NULL;
2845         Elf_Data *btf_data = NULL;
2846         int idx = 0, err = 0;
2847         const char *name;
2848         Elf_Data *data;
2849         Elf_Scn *scn;
2850         GElf_Shdr sh;
2851
2852         /* a bunch of ELF parsing functionality depends on processing symbols,
2853          * so do the first pass and find the symbol table
2854          */
2855         scn = NULL;
2856         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2857                 if (elf_sec_hdr(obj, scn, &sh))
2858                         return -LIBBPF_ERRNO__FORMAT;
2859
2860                 if (sh.sh_type == SHT_SYMTAB) {
2861                         if (obj->efile.symbols) {
2862                                 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
2863                                 return -LIBBPF_ERRNO__FORMAT;
2864                         }
2865
2866                         data = elf_sec_data(obj, scn);
2867                         if (!data)
2868                                 return -LIBBPF_ERRNO__FORMAT;
2869
2870                         obj->efile.symbols = data;
2871                         obj->efile.symbols_shndx = elf_ndxscn(scn);
2872                         obj->efile.strtabidx = sh.sh_link;
2873                 }
2874         }
2875
2876         scn = NULL;
2877         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2878                 idx++;
2879
2880                 if (elf_sec_hdr(obj, scn, &sh))
2881                         return -LIBBPF_ERRNO__FORMAT;
2882
2883                 name = elf_sec_str(obj, sh.sh_name);
2884                 if (!name)
2885                         return -LIBBPF_ERRNO__FORMAT;
2886
2887                 if (ignore_elf_section(&sh, name))
2888                         continue;
2889
2890                 data = elf_sec_data(obj, scn);
2891                 if (!data)
2892                         return -LIBBPF_ERRNO__FORMAT;
2893
2894                 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
2895                          idx, name, (unsigned long)data->d_size,
2896                          (int)sh.sh_link, (unsigned long)sh.sh_flags,
2897                          (int)sh.sh_type);
2898
2899                 if (strcmp(name, "license") == 0) {
2900                         err = bpf_object__init_license(obj, data->d_buf, data->d_size);
2901                         if (err)
2902                                 return err;
2903                 } else if (strcmp(name, "version") == 0) {
2904                         err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
2905                         if (err)
2906                                 return err;
2907                 } else if (strcmp(name, "maps") == 0) {
2908                         obj->efile.maps_shndx = idx;
2909                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
2910                         obj->efile.btf_maps_shndx = idx;
2911                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
2912                         btf_data = data;
2913                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
2914                         btf_ext_data = data;
2915                 } else if (sh.sh_type == SHT_SYMTAB) {
2916                         /* already processed during the first pass above */
2917                 } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
2918                         if (sh.sh_flags & SHF_EXECINSTR) {
2919                                 if (strcmp(name, ".text") == 0)
2920                                         obj->efile.text_shndx = idx;
2921                                 err = bpf_object__add_programs(obj, data, name, idx);
2922                                 if (err)
2923                                         return err;
2924                         } else if (strcmp(name, DATA_SEC) == 0) {
2925                                 obj->efile.data = data;
2926                                 obj->efile.data_shndx = idx;
2927                         } else if (strcmp(name, RODATA_SEC) == 0) {
2928                                 obj->efile.rodata = data;
2929                                 obj->efile.rodata_shndx = idx;
2930                         } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
2931                                 obj->efile.st_ops_data = data;
2932                                 obj->efile.st_ops_shndx = idx;
2933                         } else {
2934                                 pr_info("elf: skipping unrecognized data section(%d) %s\n",
2935                                         idx, name);
2936                         }
2937                 } else if (sh.sh_type == SHT_REL) {
2938                         int nr_sects = obj->efile.nr_reloc_sects;
2939                         void *sects = obj->efile.reloc_sects;
2940                         int sec = sh.sh_info; /* points to other section */
2941
2942                         /* Only do relo for section with exec instructions */
2943                         if (!section_have_execinstr(obj, sec) &&
2944                             strcmp(name, ".rel" STRUCT_OPS_SEC) &&
2945                             strcmp(name, ".rel" MAPS_ELF_SEC)) {
2946                                 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
2947                                         idx, name, sec,
2948                                         elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
2949                                 continue;
2950                         }
2951
2952                         sects = libbpf_reallocarray(sects, nr_sects + 1,
2953                                                     sizeof(*obj->efile.reloc_sects));
2954                         if (!sects)
2955                                 return -ENOMEM;
2956
2957                         obj->efile.reloc_sects = sects;
2958                         obj->efile.nr_reloc_sects++;
2959
2960                         obj->efile.reloc_sects[nr_sects].shdr = sh;
2961                         obj->efile.reloc_sects[nr_sects].data = data;
2962                 } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
2963                         obj->efile.bss = data;
2964                         obj->efile.bss_shndx = idx;
2965                 } else {
2966                         pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
2967                                 (size_t)sh.sh_size);
2968                 }
2969         }
2970
2971         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
2972                 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
2973                 return -LIBBPF_ERRNO__FORMAT;
2974         }
2975
2976         /* sort BPF programs by section name and in-section instruction offset
2977          * for faster search */
2978         qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
2979
2980         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
2981 }
2982
2983 static bool sym_is_extern(const GElf_Sym *sym)
2984 {
2985         int bind = GELF_ST_BIND(sym->st_info);
2986         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
2987         return sym->st_shndx == SHN_UNDEF &&
2988                (bind == STB_GLOBAL || bind == STB_WEAK) &&
2989                GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
2990 }
2991
2992 static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
2993 {
2994         int bind = GELF_ST_BIND(sym->st_info);
2995         int type = GELF_ST_TYPE(sym->st_info);
2996
2997         /* in .text section */
2998         if (sym->st_shndx != text_shndx)
2999                 return false;
3000
3001         /* local function */
3002         if (bind == STB_LOCAL && type == STT_SECTION)
3003                 return true;
3004
3005         /* global function */
3006         return bind == STB_GLOBAL && type == STT_FUNC;
3007 }
3008
3009 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3010 {
3011         const struct btf_type *t;
3012         const char *var_name;
3013         int i, n;
3014
3015         if (!btf)
3016                 return -ESRCH;
3017
3018         n = btf__get_nr_types(btf);
3019         for (i = 1; i <= n; i++) {
3020                 t = btf__type_by_id(btf, i);
3021
3022                 if (!btf_is_var(t))
3023                         continue;
3024
3025                 var_name = btf__name_by_offset(btf, t->name_off);
3026                 if (strcmp(var_name, ext_name))
3027                         continue;
3028
3029                 if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
3030                         return -EINVAL;
3031
3032                 return i;
3033         }
3034
3035         return -ENOENT;
3036 }
3037
3038 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
3039         const struct btf_var_secinfo *vs;
3040         const struct btf_type *t;
3041         int i, j, n;
3042
3043         if (!btf)
3044                 return -ESRCH;
3045
3046         n = btf__get_nr_types(btf);
3047         for (i = 1; i <= n; i++) {
3048                 t = btf__type_by_id(btf, i);
3049
3050                 if (!btf_is_datasec(t))
3051                         continue;
3052
3053                 vs = btf_var_secinfos(t);
3054                 for (j = 0; j < btf_vlen(t); j++, vs++) {
3055                         if (vs->type == ext_btf_id)
3056                                 return i;
3057                 }
3058         }
3059
3060         return -ENOENT;
3061 }
3062
3063 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3064                                      bool *is_signed)
3065 {
3066         const struct btf_type *t;
3067         const char *name;
3068
3069         t = skip_mods_and_typedefs(btf, id, NULL);
3070         name = btf__name_by_offset(btf, t->name_off);
3071
3072         if (is_signed)
3073                 *is_signed = false;
3074         switch (btf_kind(t)) {
3075         case BTF_KIND_INT: {
3076                 int enc = btf_int_encoding(t);
3077
3078                 if (enc & BTF_INT_BOOL)
3079                         return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3080                 if (is_signed)
3081                         *is_signed = enc & BTF_INT_SIGNED;
3082                 if (t->size == 1)
3083                         return KCFG_CHAR;
3084                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3085                         return KCFG_UNKNOWN;
3086                 return KCFG_INT;
3087         }
3088         case BTF_KIND_ENUM:
3089                 if (t->size != 4)
3090                         return KCFG_UNKNOWN;
3091                 if (strcmp(name, "libbpf_tristate"))
3092                         return KCFG_UNKNOWN;
3093                 return KCFG_TRISTATE;
3094         case BTF_KIND_ARRAY:
3095                 if (btf_array(t)->nelems == 0)
3096                         return KCFG_UNKNOWN;
3097                 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3098                         return KCFG_UNKNOWN;
3099                 return KCFG_CHAR_ARR;
3100         default:
3101                 return KCFG_UNKNOWN;
3102         }
3103 }
3104
3105 static int cmp_externs(const void *_a, const void *_b)
3106 {
3107         const struct extern_desc *a = _a;
3108         const struct extern_desc *b = _b;
3109
3110         if (a->type != b->type)
3111                 return a->type < b->type ? -1 : 1;
3112
3113         if (a->type == EXT_KCFG) {
3114                 /* descending order by alignment requirements */
3115                 if (a->kcfg.align != b->kcfg.align)
3116                         return a->kcfg.align > b->kcfg.align ? -1 : 1;
3117                 /* ascending order by size, within same alignment class */
3118                 if (a->kcfg.sz != b->kcfg.sz)
3119                         return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3120         }
3121
3122         /* resolve ties by name */
3123         return strcmp(a->name, b->name);
3124 }
3125
3126 static int find_int_btf_id(const struct btf *btf)
3127 {
3128         const struct btf_type *t;
3129         int i, n;
3130
3131         n = btf__get_nr_types(btf);
3132         for (i = 1; i <= n; i++) {
3133                 t = btf__type_by_id(btf, i);
3134
3135                 if (btf_is_int(t) && btf_int_bits(t) == 32)
3136                         return i;
3137         }
3138
3139         return 0;
3140 }
3141
3142 static int bpf_object__collect_externs(struct bpf_object *obj)
3143 {
3144         struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3145         const struct btf_type *t;
3146         struct extern_desc *ext;
3147         int i, n, off;
3148         const char *ext_name, *sec_name;
3149         Elf_Scn *scn;
3150         GElf_Shdr sh;
3151
3152         if (!obj->efile.symbols)
3153                 return 0;
3154
3155         scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3156         if (elf_sec_hdr(obj, scn, &sh))
3157                 return -LIBBPF_ERRNO__FORMAT;
3158
3159         n = sh.sh_size / sh.sh_entsize;
3160         pr_debug("looking for externs among %d symbols...\n", n);
3161
3162         for (i = 0; i < n; i++) {
3163                 GElf_Sym sym;
3164
3165                 if (!gelf_getsym(obj->efile.symbols, i, &sym))
3166                         return -LIBBPF_ERRNO__FORMAT;
3167                 if (!sym_is_extern(&sym))
3168                         continue;
3169                 ext_name = elf_sym_str(obj, sym.st_name);
3170                 if (!ext_name || !ext_name[0])
3171                         continue;
3172
3173                 ext = obj->externs;
3174                 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3175                 if (!ext)
3176                         return -ENOMEM;
3177                 obj->externs = ext;
3178                 ext = &ext[obj->nr_extern];
3179                 memset(ext, 0, sizeof(*ext));
3180                 obj->nr_extern++;
3181
3182                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3183                 if (ext->btf_id <= 0) {
3184                         pr_warn("failed to find BTF for extern '%s': %d\n",
3185                                 ext_name, ext->btf_id);
3186                         return ext->btf_id;
3187                 }
3188                 t = btf__type_by_id(obj->btf, ext->btf_id);
3189                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
3190                 ext->sym_idx = i;
3191                 ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
3192
3193                 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3194                 if (ext->sec_btf_id <= 0) {
3195                         pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3196                                 ext_name, ext->btf_id, ext->sec_btf_id);
3197                         return ext->sec_btf_id;
3198                 }
3199                 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3200                 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3201
3202                 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3203                         kcfg_sec = sec;
3204                         ext->type = EXT_KCFG;
3205                         ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3206                         if (ext->kcfg.sz <= 0) {
3207                                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3208                                         ext_name, ext->kcfg.sz);
3209                                 return ext->kcfg.sz;
3210                         }
3211                         ext->kcfg.align = btf__align_of(obj->btf, t->type);
3212                         if (ext->kcfg.align <= 0) {
3213                                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3214                                         ext_name, ext->kcfg.align);
3215                                 return -EINVAL;
3216                         }
3217                         ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3218                                                         &ext->kcfg.is_signed);
3219                         if (ext->kcfg.type == KCFG_UNKNOWN) {
3220                                 pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
3221                                 return -ENOTSUP;
3222                         }
3223                 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3224                         ksym_sec = sec;
3225                         ext->type = EXT_KSYM;
3226                         skip_mods_and_typedefs(obj->btf, t->type,
3227                                                &ext->ksym.type_id);
3228                 } else {
3229                         pr_warn("unrecognized extern section '%s'\n", sec_name);
3230                         return -ENOTSUP;
3231                 }
3232         }
3233         pr_debug("collected %d externs total\n", obj->nr_extern);
3234
3235         if (!obj->nr_extern)
3236                 return 0;
3237
3238         /* sort externs by type, for kcfg ones also by (align, size, name) */
3239         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3240
3241         /* for .ksyms section, we need to turn all externs into allocated
3242          * variables in BTF to pass kernel verification; we do this by
3243          * pretending that each extern is a 8-byte variable
3244          */
3245         if (ksym_sec) {
3246                 /* find existing 4-byte integer type in BTF to use for fake
3247                  * extern variables in DATASEC
3248                  */
3249                 int int_btf_id = find_int_btf_id(obj->btf);
3250
3251                 for (i = 0; i < obj->nr_extern; i++) {
3252                         ext = &obj->externs[i];
3253                         if (ext->type != EXT_KSYM)
3254                                 continue;
3255                         pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3256                                  i, ext->sym_idx, ext->name);
3257                 }
3258
3259                 sec = ksym_sec;
3260                 n = btf_vlen(sec);
3261                 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3262                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3263                         struct btf_type *vt;
3264
3265                         vt = (void *)btf__type_by_id(obj->btf, vs->type);
3266                         ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3267                         ext = find_extern_by_name(obj, ext_name);
3268                         if (!ext) {
3269                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3270                                         ext_name);
3271                                 return -ESRCH;
3272                         }
3273                         btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3274                         vt->type = int_btf_id;
3275                         vs->offset = off;
3276                         vs->size = sizeof(int);
3277                 }
3278                 sec->size = off;
3279         }
3280
3281         if (kcfg_sec) {
3282                 sec = kcfg_sec;
3283                 /* for kcfg externs calculate their offsets within a .kconfig map */
3284                 off = 0;
3285                 for (i = 0; i < obj->nr_extern; i++) {
3286                         ext = &obj->externs[i];
3287                         if (ext->type != EXT_KCFG)
3288                                 continue;
3289
3290                         ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3291                         off = ext->kcfg.data_off + ext->kcfg.sz;
3292                         pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3293                                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3294                 }
3295                 sec->size = off;
3296                 n = btf_vlen(sec);
3297                 for (i = 0; i < n; i++) {
3298                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3299
3300                         t = btf__type_by_id(obj->btf, vs->type);
3301                         ext_name = btf__name_by_offset(obj->btf, t->name_off);
3302                         ext = find_extern_by_name(obj, ext_name);
3303                         if (!ext) {
3304                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3305                                         ext_name);
3306                                 return -ESRCH;
3307                         }
3308                         btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3309                         vs->offset = ext->kcfg.data_off;
3310                 }
3311         }
3312         return 0;
3313 }
3314
3315 struct bpf_program *
3316 bpf_object__find_program_by_title(const struct bpf_object *obj,
3317                                   const char *title)
3318 {
3319         struct bpf_program *pos;
3320
3321         bpf_object__for_each_program(pos, obj) {
3322                 if (pos->sec_name && !strcmp(pos->sec_name, title))
3323                         return pos;
3324         }
3325         return NULL;
3326 }
3327
3328 static bool prog_is_subprog(const struct bpf_object *obj,
3329                             const struct bpf_program *prog)
3330 {
3331         /* For legacy reasons, libbpf supports an entry-point BPF programs
3332          * without SEC() attribute, i.e., those in the .text section. But if
3333          * there are 2 or more such programs in the .text section, they all
3334          * must be subprograms called from entry-point BPF programs in
3335          * designated SEC()'tions, otherwise there is no way to distinguish
3336          * which of those programs should be loaded vs which are a subprogram.
3337          * Similarly, if there is a function/program in .text and at least one
3338          * other BPF program with custom SEC() attribute, then we just assume
3339          * .text programs are subprograms (even if they are not called from
3340          * other programs), because libbpf never explicitly supported mixing
3341          * SEC()-designated BPF programs and .text entry-point BPF programs.
3342          */
3343         return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
3344 }
3345
3346 struct bpf_program *
3347 bpf_object__find_program_by_name(const struct bpf_object *obj,
3348                                  const char *name)
3349 {
3350         struct bpf_program *prog;
3351
3352         bpf_object__for_each_program(prog, obj) {
3353                 if (prog_is_subprog(obj, prog))
3354                         continue;
3355                 if (!strcmp(prog->name, name))
3356                         return prog;
3357         }
3358         return NULL;
3359 }
3360
3361 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3362                                       int shndx)
3363 {
3364         return shndx == obj->efile.data_shndx ||
3365                shndx == obj->efile.bss_shndx ||
3366                shndx == obj->efile.rodata_shndx;
3367 }
3368
3369 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
3370                                       int shndx)
3371 {
3372         return shndx == obj->efile.maps_shndx ||
3373                shndx == obj->efile.btf_maps_shndx;
3374 }
3375
3376 static enum libbpf_map_type
3377 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
3378 {
3379         if (shndx == obj->efile.data_shndx)
3380                 return LIBBPF_MAP_DATA;
3381         else if (shndx == obj->efile.bss_shndx)
3382                 return LIBBPF_MAP_BSS;
3383         else if (shndx == obj->efile.rodata_shndx)
3384                 return LIBBPF_MAP_RODATA;
3385         else if (shndx == obj->efile.symbols_shndx)
3386                 return LIBBPF_MAP_KCONFIG;
3387         else
3388                 return LIBBPF_MAP_UNSPEC;
3389 }
3390
3391 static int bpf_program__record_reloc(struct bpf_program *prog,
3392                                      struct reloc_desc *reloc_desc,
3393                                      __u32 insn_idx, const char *sym_name,
3394                                      const GElf_Sym *sym, const GElf_Rel *rel)
3395 {
3396         struct bpf_insn *insn = &prog->insns[insn_idx];
3397         size_t map_idx, nr_maps = prog->obj->nr_maps;
3398         struct bpf_object *obj = prog->obj;
3399         __u32 shdr_idx = sym->st_shndx;
3400         enum libbpf_map_type type;
3401         const char *sym_sec_name;
3402         struct bpf_map *map;
3403
3404         reloc_desc->processed = false;
3405
3406         /* sub-program call relocation */
3407         if (insn->code == (BPF_JMP | BPF_CALL)) {
3408                 if (insn->src_reg != BPF_PSEUDO_CALL) {
3409                         pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
3410                         return -LIBBPF_ERRNO__RELOC;
3411                 }
3412                 /* text_shndx can be 0, if no default "main" program exists */
3413                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
3414                         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3415                         pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
3416                                 prog->name, sym_name, sym_sec_name);
3417                         return -LIBBPF_ERRNO__RELOC;
3418                 }
3419                 if (sym->st_value % BPF_INSN_SZ) {
3420                         pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
3421                                 prog->name, sym_name, (size_t)sym->st_value);
3422                         return -LIBBPF_ERRNO__RELOC;
3423                 }
3424                 reloc_desc->type = RELO_CALL;
3425                 reloc_desc->insn_idx = insn_idx;
3426                 reloc_desc->sym_off = sym->st_value;
3427                 return 0;
3428         }
3429
3430         if (!is_ldimm64(insn)) {
3431                 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
3432                         prog->name, sym_name, insn_idx, insn->code);
3433                 return -LIBBPF_ERRNO__RELOC;
3434         }
3435
3436         if (sym_is_extern(sym)) {
3437                 int sym_idx = GELF_R_SYM(rel->r_info);
3438                 int i, n = obj->nr_extern;
3439                 struct extern_desc *ext;
3440
3441                 for (i = 0; i < n; i++) {
3442                         ext = &obj->externs[i];
3443                         if (ext->sym_idx == sym_idx)
3444                                 break;
3445                 }
3446                 if (i >= n) {
3447                         pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
3448                                 prog->name, sym_name, sym_idx);
3449                         return -LIBBPF_ERRNO__RELOC;
3450                 }
3451                 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
3452                          prog->name, i, ext->name, ext->sym_idx, insn_idx);
3453                 reloc_desc->type = RELO_EXTERN;
3454                 reloc_desc->insn_idx = insn_idx;
3455                 reloc_desc->sym_off = i; /* sym_off stores extern index */
3456                 return 0;
3457         }
3458
3459         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
3460                 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
3461                         prog->name, sym_name, shdr_idx);
3462                 return -LIBBPF_ERRNO__RELOC;
3463         }
3464
3465         /* loading subprog addresses */
3466         if (sym_is_subprog(sym, obj->efile.text_shndx)) {
3467                 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
3468                  * local_func: sym->st_value = 0, insn->imm = offset in the section.
3469                  */
3470                 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
3471                         pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
3472                                 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
3473                         return -LIBBPF_ERRNO__RELOC;
3474                 }
3475
3476                 reloc_desc->type = RELO_SUBPROG_ADDR;
3477                 reloc_desc->insn_idx = insn_idx;
3478                 reloc_desc->sym_off = sym->st_value;
3479                 return 0;
3480         }
3481
3482         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
3483         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3484
3485         /* generic map reference relocation */
3486         if (type == LIBBPF_MAP_UNSPEC) {
3487                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
3488                         pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
3489                                 prog->name, sym_name, sym_sec_name);
3490                         return -LIBBPF_ERRNO__RELOC;
3491                 }
3492                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3493                         map = &obj->maps[map_idx];
3494                         if (map->libbpf_type != type ||
3495                             map->sec_idx != sym->st_shndx ||
3496                             map->sec_offset != sym->st_value)
3497                                 continue;
3498                         pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
3499                                  prog->name, map_idx, map->name, map->sec_idx,
3500                                  map->sec_offset, insn_idx);
3501                         break;
3502                 }
3503                 if (map_idx >= nr_maps) {
3504                         pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
3505                                 prog->name, sym_sec_name, (size_t)sym->st_value);
3506                         return -LIBBPF_ERRNO__RELOC;
3507                 }
3508                 reloc_desc->type = RELO_LD64;
3509                 reloc_desc->insn_idx = insn_idx;
3510                 reloc_desc->map_idx = map_idx;
3511                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
3512                 return 0;
3513         }
3514
3515         /* global data map relocation */
3516         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
3517                 pr_warn("prog '%s': bad data relo against section '%s'\n",
3518                         prog->name, sym_sec_name);
3519                 return -LIBBPF_ERRNO__RELOC;
3520         }
3521         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3522                 map = &obj->maps[map_idx];
3523                 if (map->libbpf_type != type)
3524                         continue;
3525                 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
3526                          prog->name, map_idx, map->name, map->sec_idx,
3527                          map->sec_offset, insn_idx);
3528                 break;
3529         }
3530         if (map_idx >= nr_maps) {
3531                 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
3532                         prog->name, sym_sec_name);
3533                 return -LIBBPF_ERRNO__RELOC;
3534         }
3535
3536         reloc_desc->type = RELO_DATA;
3537         reloc_desc->insn_idx = insn_idx;
3538         reloc_desc->map_idx = map_idx;
3539         reloc_desc->sym_off = sym->st_value;
3540         return 0;
3541 }
3542
3543 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
3544 {
3545         return insn_idx >= prog->sec_insn_off &&
3546                insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
3547 }
3548
3549 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
3550                                                  size_t sec_idx, size_t insn_idx)
3551 {
3552         int l = 0, r = obj->nr_programs - 1, m;
3553         struct bpf_program *prog;
3554
3555         while (l < r) {
3556                 m = l + (r - l + 1) / 2;
3557                 prog = &obj->programs[m];
3558
3559                 if (prog->sec_idx < sec_idx ||
3560                     (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
3561                         l = m;
3562                 else
3563                         r = m - 1;
3564         }
3565         /* matching program could be at index l, but it still might be the
3566          * wrong one, so we need to double check conditions for the last time
3567          */
3568         prog = &obj->programs[l];
3569         if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
3570                 return prog;
3571         return NULL;
3572 }
3573
3574 static int
3575 bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data)
3576 {
3577         Elf_Data *symbols = obj->efile.symbols;
3578         const char *relo_sec_name, *sec_name;
3579         size_t sec_idx = shdr->sh_info;
3580         struct bpf_program *prog;
3581         struct reloc_desc *relos;
3582         int err, i, nrels;
3583         const char *sym_name;
3584         __u32 insn_idx;
3585         GElf_Sym sym;
3586         GElf_Rel rel;
3587
3588         relo_sec_name = elf_sec_str(obj, shdr->sh_name);
3589         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
3590         if (!relo_sec_name || !sec_name)
3591                 return -EINVAL;
3592
3593         pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
3594                  relo_sec_name, sec_idx, sec_name);
3595         nrels = shdr->sh_size / shdr->sh_entsize;
3596
3597         for (i = 0; i < nrels; i++) {
3598                 if (!gelf_getrel(data, i, &rel)) {
3599                         pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
3600                         return -LIBBPF_ERRNO__FORMAT;
3601                 }
3602                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
3603                         pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
3604                                 relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
3605                         return -LIBBPF_ERRNO__FORMAT;
3606                 }
3607                 if (rel.r_offset % BPF_INSN_SZ) {
3608                         pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
3609                                 relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
3610                         return -LIBBPF_ERRNO__FORMAT;
3611                 }
3612
3613                 insn_idx = rel.r_offset / BPF_INSN_SZ;
3614                 /* relocations against static functions are recorded as
3615                  * relocations against the section that contains a function;
3616                  * in such case, symbol will be STT_SECTION and sym.st_name
3617                  * will point to empty string (0), so fetch section name
3618                  * instead
3619                  */
3620                 if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0)
3621                         sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx));
3622                 else
3623                         sym_name = elf_sym_str(obj, sym.st_name);
3624                 sym_name = sym_name ?: "<?";
3625
3626                 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
3627                          relo_sec_name, i, insn_idx, sym_name);
3628
3629                 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
3630                 if (!prog) {
3631                         pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
3632                                 relo_sec_name, i, sec_name, insn_idx);
3633                         return -LIBBPF_ERRNO__RELOC;
3634                 }
3635
3636                 relos = libbpf_reallocarray(prog->reloc_desc,
3637                                             prog->nr_reloc + 1, sizeof(*relos));
3638                 if (!relos)
3639                         return -ENOMEM;
3640                 prog->reloc_desc = relos;
3641
3642                 /* adjust insn_idx to local BPF program frame of reference */
3643                 insn_idx -= prog->sec_insn_off;
3644                 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
3645                                                 insn_idx, sym_name, &sym, &rel);
3646                 if (err)
3647                         return err;
3648
3649                 prog->nr_reloc++;
3650         }
3651         return 0;
3652 }
3653
3654 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
3655 {
3656         struct bpf_map_def *def = &map->def;
3657         __u32 key_type_id = 0, value_type_id = 0;
3658         int ret;
3659
3660         /* if it's BTF-defined map, we don't need to search for type IDs.
3661          * For struct_ops map, it does not need btf_key_type_id and
3662          * btf_value_type_id.
3663          */
3664         if (map->sec_idx == obj->efile.btf_maps_shndx ||
3665             bpf_map__is_struct_ops(map))
3666                 return 0;
3667
3668         if (!bpf_map__is_internal(map)) {
3669                 ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
3670                                            def->value_size, &key_type_id,
3671                                            &value_type_id);
3672         } else {
3673                 /*
3674                  * LLVM annotates global data differently in BTF, that is,
3675                  * only as '.data', '.bss' or '.rodata'.
3676                  */
3677                 ret = btf__find_by_name(obj->btf,
3678                                 libbpf_type_to_btf_name[map->libbpf_type]);
3679         }
3680         if (ret < 0)
3681                 return ret;
3682
3683         map->btf_key_type_id = key_type_id;
3684         map->btf_value_type_id = bpf_map__is_internal(map) ?
3685                                  ret : value_type_id;
3686         return 0;
3687 }
3688
3689 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
3690 {
3691         struct bpf_map_info info = {};
3692         __u32 len = sizeof(info);
3693         int new_fd, err;
3694         char *new_name;
3695
3696         err = bpf_obj_get_info_by_fd(fd, &info, &len);
3697         if (err)
3698                 return err;
3699
3700         new_name = strdup(info.name);
3701         if (!new_name)
3702                 return -errno;
3703
3704         new_fd = open("/", O_RDONLY | O_CLOEXEC);
3705         if (new_fd < 0) {
3706                 err = -errno;
3707                 goto err_free_new_name;
3708         }
3709
3710         new_fd = dup3(fd, new_fd, O_CLOEXEC);
3711         if (new_fd < 0) {
3712                 err = -errno;
3713                 goto err_close_new_fd;
3714         }
3715
3716         err = zclose(map->fd);
3717         if (err) {
3718                 err = -errno;
3719                 goto err_close_new_fd;
3720         }
3721         free(map->name);
3722
3723         map->fd = new_fd;
3724         map->name = new_name;
3725         map->def.type = info.type;
3726         map->def.key_size = info.key_size;
3727         map->def.value_size = info.value_size;
3728         map->def.max_entries = info.max_entries;
3729         map->def.map_flags = info.map_flags;
3730         map->btf_key_type_id = info.btf_key_type_id;
3731         map->btf_value_type_id = info.btf_value_type_id;
3732         map->reused = true;
3733
3734         return 0;
3735
3736 err_close_new_fd:
3737         close(new_fd);
3738 err_free_new_name:
3739         free(new_name);
3740         return err;
3741 }
3742
3743 __u32 bpf_map__max_entries(const struct bpf_map *map)
3744 {
3745         return map->def.max_entries;
3746 }
3747
3748 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
3749 {
3750         if (map->fd >= 0)
3751                 return -EBUSY;
3752         map->def.max_entries = max_entries;
3753         return 0;
3754 }
3755
3756 int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
3757 {
3758         if (!map || !max_entries)
3759                 return -EINVAL;
3760
3761         return bpf_map__set_max_entries(map, max_entries);
3762 }
3763
3764 static int
3765 bpf_object__probe_loading(struct bpf_object *obj)
3766 {
3767         struct bpf_load_program_attr attr;
3768         char *cp, errmsg[STRERR_BUFSIZE];
3769         struct bpf_insn insns[] = {
3770                 BPF_MOV64_IMM(BPF_REG_0, 0),
3771                 BPF_EXIT_INSN(),
3772         };
3773         int ret;
3774
3775         /* make sure basic loading works */
3776
3777         memset(&attr, 0, sizeof(attr));
3778         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3779         attr.insns = insns;
3780         attr.insns_cnt = ARRAY_SIZE(insns);
3781         attr.license = "GPL";
3782
3783         ret = bpf_load_program_xattr(&attr, NULL, 0);
3784         if (ret < 0) {
3785                 ret = errno;
3786                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3787                 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
3788                         "program. Make sure your kernel supports BPF "
3789                         "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
3790                         "set to big enough value.\n", __func__, cp, ret);
3791                 return -ret;
3792         }
3793         close(ret);
3794
3795         return 0;
3796 }
3797
3798 static int probe_fd(int fd)
3799 {
3800         if (fd >= 0)
3801                 close(fd);
3802         return fd >= 0;
3803 }
3804
3805 static int probe_kern_prog_name(void)
3806 {
3807         struct bpf_load_program_attr attr;
3808         struct bpf_insn insns[] = {
3809                 BPF_MOV64_IMM(BPF_REG_0, 0),
3810                 BPF_EXIT_INSN(),
3811         };
3812         int ret;
3813
3814         /* make sure loading with name works */
3815
3816         memset(&attr, 0, sizeof(attr));
3817         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3818         attr.insns = insns;
3819         attr.insns_cnt = ARRAY_SIZE(insns);
3820         attr.license = "GPL";
3821         attr.name = "test";
3822         ret = bpf_load_program_xattr(&attr, NULL, 0);
3823         return probe_fd(ret);
3824 }
3825
3826 static int probe_kern_global_data(void)
3827 {
3828         struct bpf_load_program_attr prg_attr;
3829         struct bpf_create_map_attr map_attr;
3830         char *cp, errmsg[STRERR_BUFSIZE];
3831         struct bpf_insn insns[] = {
3832                 BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
3833                 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
3834                 BPF_MOV64_IMM(BPF_REG_0, 0),
3835                 BPF_EXIT_INSN(),
3836         };
3837         int ret, map;
3838
3839         memset(&map_attr, 0, sizeof(map_attr));
3840         map_attr.map_type = BPF_MAP_TYPE_ARRAY;
3841         map_attr.key_size = sizeof(int);
3842         map_attr.value_size = 32;
3843         map_attr.max_entries = 1;
3844
3845         map = bpf_create_map_xattr(&map_attr);
3846         if (map < 0) {
3847                 ret = -errno;
3848                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3849                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
3850                         __func__, cp, -ret);
3851                 return ret;
3852         }
3853
3854         insns[0].imm = map;
3855
3856         memset(&prg_attr, 0, sizeof(prg_attr));
3857         prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3858         prg_attr.insns = insns;
3859         prg_attr.insns_cnt = ARRAY_SIZE(insns);
3860         prg_attr.license = "GPL";
3861
3862         ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
3863         close(map);
3864         return probe_fd(ret);
3865 }
3866
3867 static int probe_kern_btf(void)
3868 {
3869         static const char strs[] = "\0int";
3870         __u32 types[] = {
3871                 /* int */
3872                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
3873         };
3874
3875         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3876                                              strs, sizeof(strs)));
3877 }
3878
3879 static int probe_kern_btf_func(void)
3880 {
3881         static const char strs[] = "\0int\0x\0a";
3882         /* void x(int a) {} */
3883         __u32 types[] = {
3884                 /* int */
3885                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
3886                 /* FUNC_PROTO */                                /* [2] */
3887                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
3888                 BTF_PARAM_ENC(7, 1),
3889                 /* FUNC x */                                    /* [3] */
3890                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
3891         };
3892
3893         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3894                                              strs, sizeof(strs)));
3895 }
3896
3897 static int probe_kern_btf_func_global(void)
3898 {
3899         static const char strs[] = "\0int\0x\0a";
3900         /* static void x(int a) {} */
3901         __u32 types[] = {
3902                 /* int */
3903                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
3904                 /* FUNC_PROTO */                                /* [2] */
3905                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
3906                 BTF_PARAM_ENC(7, 1),
3907                 /* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
3908                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
3909         };
3910
3911         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3912                                              strs, sizeof(strs)));
3913 }
3914
3915 static int probe_kern_btf_datasec(void)
3916 {
3917         static const char strs[] = "\0x\0.data";
3918         /* static int a; */
3919         __u32 types[] = {
3920                 /* int */
3921                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
3922                 /* VAR x */                                     /* [2] */
3923                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
3924                 BTF_VAR_STATIC,
3925                 /* DATASEC val */                               /* [3] */
3926                 BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
3927                 BTF_VAR_SECINFO_ENC(2, 0, 4),
3928         };
3929
3930         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3931                                              strs, sizeof(strs)));
3932 }
3933
3934 static int probe_kern_btf_float(void)
3935 {
3936         static const char strs[] = "\0float";
3937         __u32 types[] = {
3938                 /* float */
3939                 BTF_TYPE_FLOAT_ENC(1, 4),
3940         };
3941
3942         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3943                                              strs, sizeof(strs)));
3944 }
3945
3946 static int probe_kern_array_mmap(void)
3947 {
3948         struct bpf_create_map_attr attr = {
3949                 .map_type = BPF_MAP_TYPE_ARRAY,
3950                 .map_flags = BPF_F_MMAPABLE,
3951                 .key_size = sizeof(int),
3952                 .value_size = sizeof(int),
3953                 .max_entries = 1,
3954         };
3955
3956         return probe_fd(bpf_create_map_xattr(&attr));
3957 }
3958
3959 static int probe_kern_exp_attach_type(void)
3960 {
3961         struct bpf_load_program_attr attr;
3962         struct bpf_insn insns[] = {
3963                 BPF_MOV64_IMM(BPF_REG_0, 0),
3964                 BPF_EXIT_INSN(),
3965         };
3966
3967         memset(&attr, 0, sizeof(attr));
3968         /* use any valid combination of program type and (optional)
3969          * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
3970          * to see if kernel supports expected_attach_type field for
3971          * BPF_PROG_LOAD command
3972          */
3973         attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
3974         attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
3975         attr.insns = insns;
3976         attr.insns_cnt = ARRAY_SIZE(insns);
3977         attr.license = "GPL";
3978
3979         return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
3980 }
3981
3982 static int probe_kern_probe_read_kernel(void)
3983 {
3984         struct bpf_load_program_attr attr;
3985         struct bpf_insn insns[] = {
3986                 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),   /* r1 = r10 (fp) */
3987                 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),  /* r1 += -8 */
3988                 BPF_MOV64_IMM(BPF_REG_2, 8),            /* r2 = 8 */
3989                 BPF_MOV64_IMM(BPF_REG_3, 0),            /* r3 = 0 */
3990                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
3991                 BPF_EXIT_INSN(),
3992         };
3993
3994         memset(&attr, 0, sizeof(attr));
3995         attr.prog_type = BPF_PROG_TYPE_KPROBE;
3996         attr.insns = insns;
3997         attr.insns_cnt = ARRAY_SIZE(insns);
3998         attr.license = "GPL";
3999
4000         return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
4001 }
4002
4003 static int probe_prog_bind_map(void)
4004 {
4005         struct bpf_load_program_attr prg_attr;
4006         struct bpf_create_map_attr map_attr;
4007         char *cp, errmsg[STRERR_BUFSIZE];
4008         struct bpf_insn insns[] = {
4009                 BPF_MOV64_IMM(BPF_REG_0, 0),
4010                 BPF_EXIT_INSN(),
4011         };
4012         int ret, map, prog;
4013
4014         memset(&map_attr, 0, sizeof(map_attr));
4015         map_attr.map_type = BPF_MAP_TYPE_ARRAY;
4016         map_attr.key_size = sizeof(int);
4017         map_attr.value_size = 32;
4018         map_attr.max_entries = 1;
4019
4020         map = bpf_create_map_xattr(&map_attr);
4021         if (map < 0) {
4022                 ret = -errno;
4023                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4024                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4025                         __func__, cp, -ret);
4026                 return ret;
4027         }
4028
4029         memset(&prg_attr, 0, sizeof(prg_attr));
4030         prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
4031         prg_attr.insns = insns;
4032         prg_attr.insns_cnt = ARRAY_SIZE(insns);
4033         prg_attr.license = "GPL";
4034
4035         prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
4036         if (prog < 0) {
4037                 close(map);
4038                 return 0;
4039         }
4040
4041         ret = bpf_prog_bind_map(prog, map, NULL);
4042
4043         close(map);
4044         close(prog);
4045
4046         return ret >= 0;
4047 }
4048
4049 static int probe_module_btf(void)
4050 {
4051         static const char strs[] = "\0int";
4052         __u32 types[] = {
4053                 /* int */
4054                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4055         };
4056         struct bpf_btf_info info;
4057         __u32 len = sizeof(info);
4058         char name[16];
4059         int fd, err;
4060
4061         fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
4062         if (fd < 0)
4063                 return 0; /* BTF not supported at all */
4064
4065         memset(&info, 0, sizeof(info));
4066         info.name = ptr_to_u64(name);
4067         info.name_len = sizeof(name);
4068
4069         /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
4070          * kernel's module BTF support coincides with support for
4071          * name/name_len fields in struct bpf_btf_info.
4072          */
4073         err = bpf_obj_get_info_by_fd(fd, &info, &len);
4074         close(fd);
4075         return !err;
4076 }
4077
4078 enum kern_feature_result {
4079         FEAT_UNKNOWN = 0,
4080         FEAT_SUPPORTED = 1,
4081         FEAT_MISSING = 2,
4082 };
4083
4084 typedef int (*feature_probe_fn)(void);
4085
4086 static struct kern_feature_desc {
4087         const char *desc;
4088         feature_probe_fn probe;
4089         enum kern_feature_result res;
4090 } feature_probes[__FEAT_CNT] = {
4091         [FEAT_PROG_NAME] = {
4092                 "BPF program name", probe_kern_prog_name,
4093         },
4094         [FEAT_GLOBAL_DATA] = {
4095                 "global variables", probe_kern_global_data,
4096         },
4097         [FEAT_BTF] = {
4098                 "minimal BTF", probe_kern_btf,
4099         },
4100         [FEAT_BTF_FUNC] = {
4101                 "BTF functions", probe_kern_btf_func,
4102         },
4103         [FEAT_BTF_GLOBAL_FUNC] = {
4104                 "BTF global function", probe_kern_btf_func_global,
4105         },
4106         [FEAT_BTF_DATASEC] = {
4107                 "BTF data section and variable", probe_kern_btf_datasec,
4108         },
4109         [FEAT_ARRAY_MMAP] = {
4110                 "ARRAY map mmap()", probe_kern_array_mmap,
4111         },
4112         [FEAT_EXP_ATTACH_TYPE] = {
4113                 "BPF_PROG_LOAD expected_attach_type attribute",
4114                 probe_kern_exp_attach_type,
4115         },
4116         [FEAT_PROBE_READ_KERN] = {
4117                 "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
4118         },
4119         [FEAT_PROG_BIND_MAP] = {
4120                 "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
4121         },
4122         [FEAT_MODULE_BTF] = {
4123                 "module BTF support", probe_module_btf,
4124         },
4125         [FEAT_BTF_FLOAT] = {
4126                 "BTF_KIND_FLOAT support", probe_kern_btf_float,
4127         },
4128 };
4129
4130 static bool kernel_supports(enum kern_feature_id feat_id)
4131 {
4132         struct kern_feature_desc *feat = &feature_probes[feat_id];
4133         int ret;
4134
4135         if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
4136                 ret = feat->probe();
4137                 if (ret > 0) {
4138                         WRITE_ONCE(feat->res, FEAT_SUPPORTED);
4139                 } else if (ret == 0) {
4140                         WRITE_ONCE(feat->res, FEAT_MISSING);
4141                 } else {
4142                         pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
4143                         WRITE_ONCE(feat->res, FEAT_MISSING);
4144                 }
4145         }
4146
4147         return READ_ONCE(feat->res) == FEAT_SUPPORTED;
4148 }
4149
4150 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4151 {
4152         struct bpf_map_info map_info = {};
4153         char msg[STRERR_BUFSIZE];
4154         __u32 map_info_len;
4155
4156         map_info_len = sizeof(map_info);
4157
4158         if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
4159                 pr_warn("failed to get map info for map FD %d: %s\n",
4160                         map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
4161                 return false;
4162         }
4163
4164         return (map_info.type == map->def.type &&
4165                 map_info.key_size == map->def.key_size &&
4166                 map_info.value_size == map->def.value_size &&
4167                 map_info.max_entries == map->def.max_entries &&
4168                 map_info.map_flags == map->def.map_flags);
4169 }
4170
4171 static int
4172 bpf_object__reuse_map(struct bpf_map *map)
4173 {
4174         char *cp, errmsg[STRERR_BUFSIZE];
4175         int err, pin_fd;
4176
4177         pin_fd = bpf_obj_get(map->pin_path);
4178         if (pin_fd < 0) {
4179                 err = -errno;
4180                 if (err == -ENOENT) {
4181                         pr_debug("found no pinned map to reuse at '%s'\n",
4182                                  map->pin_path);
4183                         return 0;
4184                 }
4185
4186                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4187                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
4188                         map->pin_path, cp);
4189                 return err;
4190         }
4191
4192         if (!map_is_reuse_compat(map, pin_fd)) {
4193                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4194                         map->pin_path);
4195                 close(pin_fd);
4196                 return -EINVAL;
4197         }
4198
4199         err = bpf_map__reuse_fd(map, pin_fd);
4200         if (err) {
4201                 close(pin_fd);
4202                 return err;
4203         }
4204         map->pinned = true;
4205         pr_debug("reused pinned map at '%s'\n", map->pin_path);
4206
4207         return 0;
4208 }
4209
4210 static int
4211 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4212 {
4213         enum libbpf_map_type map_type = map->libbpf_type;
4214         char *cp, errmsg[STRERR_BUFSIZE];
4215         int err, zero = 0;
4216
4217         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4218         if (err) {
4219                 err = -errno;
4220                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4221                 pr_warn("Error setting initial map(%s) contents: %s\n",
4222                         map->name, cp);
4223                 return err;
4224         }
4225
4226         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4227         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
4228                 err = bpf_map_freeze(map->fd);
4229                 if (err) {
4230                         err = -errno;
4231                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4232                         pr_warn("Error freezing map(%s) as read-only: %s\n",
4233                                 map->name, cp);
4234                         return err;
4235                 }
4236         }
4237         return 0;
4238 }
4239
4240 static void bpf_map__destroy(struct bpf_map *map);
4241
4242 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
4243 {
4244         struct bpf_create_map_attr create_attr;
4245         struct bpf_map_def *def = &map->def;
4246
4247         memset(&create_attr, 0, sizeof(create_attr));
4248
4249         if (kernel_supports(FEAT_PROG_NAME))
4250                 create_attr.name = map->name;
4251         create_attr.map_ifindex = map->map_ifindex;
4252         create_attr.map_type = def->type;
4253         create_attr.map_flags = def->map_flags;
4254         create_attr.key_size = def->key_size;
4255         create_attr.value_size = def->value_size;
4256         create_attr.numa_node = map->numa_node;
4257
4258         if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
4259                 int nr_cpus;
4260
4261                 nr_cpus = libbpf_num_possible_cpus();
4262                 if (nr_cpus < 0) {
4263                         pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
4264                                 map->name, nr_cpus);
4265                         return nr_cpus;
4266                 }
4267                 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
4268                 create_attr.max_entries = nr_cpus;
4269         } else {
4270                 create_attr.max_entries = def->max_entries;
4271         }
4272
4273         if (bpf_map__is_struct_ops(map))
4274                 create_attr.btf_vmlinux_value_type_id =
4275                         map->btf_vmlinux_value_type_id;
4276
4277         create_attr.btf_fd = 0;
4278         create_attr.btf_key_type_id = 0;
4279         create_attr.btf_value_type_id = 0;
4280         if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
4281                 create_attr.btf_fd = btf__fd(obj->btf);
4282                 create_attr.btf_key_type_id = map->btf_key_type_id;
4283                 create_attr.btf_value_type_id = map->btf_value_type_id;
4284         }
4285
4286         if (bpf_map_type__is_map_in_map(def->type)) {
4287                 if (map->inner_map) {
4288                         int err;
4289
4290                         err = bpf_object__create_map(obj, map->inner_map);
4291                         if (err) {
4292                                 pr_warn("map '%s': failed to create inner map: %d\n",
4293                                         map->name, err);
4294                                 return err;
4295                         }
4296                         map->inner_map_fd = bpf_map__fd(map->inner_map);
4297                 }
4298                 if (map->inner_map_fd >= 0)
4299                         create_attr.inner_map_fd = map->inner_map_fd;
4300         }
4301
4302         map->fd = bpf_create_map_xattr(&create_attr);
4303         if (map->fd < 0 && (create_attr.btf_key_type_id ||
4304                             create_attr.btf_value_type_id)) {
4305                 char *cp, errmsg[STRERR_BUFSIZE];
4306                 int err = -errno;
4307
4308                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4309                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
4310                         map->name, cp, err);
4311                 create_attr.btf_fd = 0;
4312                 create_attr.btf_key_type_id = 0;
4313                 create_attr.btf_value_type_id = 0;
4314                 map->btf_key_type_id = 0;
4315                 map->btf_value_type_id = 0;
4316                 map->fd = bpf_create_map_xattr(&create_attr);
4317         }
4318
4319         if (map->fd < 0)
4320                 return -errno;
4321
4322         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
4323                 bpf_map__destroy(map->inner_map);
4324                 zfree(&map->inner_map);
4325         }
4326
4327         return 0;
4328 }
4329
4330 static int init_map_slots(struct bpf_map *map)
4331 {
4332         const struct bpf_map *targ_map;
4333         unsigned int i;
4334         int fd, err;
4335
4336         for (i = 0; i < map->init_slots_sz; i++) {
4337                 if (!map->init_slots[i])
4338                         continue;
4339
4340                 targ_map = map->init_slots[i];
4341                 fd = bpf_map__fd(targ_map);
4342                 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
4343                 if (err) {
4344                         err = -errno;
4345                         pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
4346                                 map->name, i, targ_map->name,
4347                                 fd, err);
4348                         return err;
4349                 }
4350                 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
4351                          map->name, i, targ_map->name, fd);
4352         }
4353
4354         zfree(&map->init_slots);
4355         map->init_slots_sz = 0;
4356
4357         return 0;
4358 }
4359
4360 static int
4361 bpf_object__create_maps(struct bpf_object *obj)
4362 {
4363         struct bpf_map *map;
4364         char *cp, errmsg[STRERR_BUFSIZE];
4365         unsigned int i, j;
4366         int err;
4367
4368         for (i = 0; i < obj->nr_maps; i++) {
4369                 map = &obj->maps[i];
4370
4371                 if (map->pin_path) {
4372                         err = bpf_object__reuse_map(map);
4373                         if (err) {
4374                                 pr_warn("map '%s': error reusing pinned map\n",
4375                                         map->name);
4376                                 goto err_out;
4377                         }
4378                 }
4379
4380                 if (map->fd >= 0) {
4381                         pr_debug("map '%s': skipping creation (preset fd=%d)\n",
4382                                  map->name, map->fd);
4383                 } else {
4384                         err = bpf_object__create_map(obj, map);
4385                         if (err)
4386                                 goto err_out;
4387
4388                         pr_debug("map '%s': created successfully, fd=%d\n",
4389                                  map->name, map->fd);
4390
4391                         if (bpf_map__is_internal(map)) {
4392                                 err = bpf_object__populate_internal_map(obj, map);
4393                                 if (err < 0) {
4394                                         zclose(map->fd);
4395                                         goto err_out;
4396                                 }
4397                         }
4398
4399                         if (map->init_slots_sz) {
4400                                 err = init_map_slots(map);
4401                                 if (err < 0) {
4402                                         zclose(map->fd);
4403                                         goto err_out;
4404                                 }
4405                         }
4406                 }
4407
4408                 if (map->pin_path && !map->pinned) {
4409                         err = bpf_map__pin(map, NULL);
4410                         if (err) {
4411                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
4412                                         map->name, map->pin_path, err);
4413                                 zclose(map->fd);
4414                                 goto err_out;
4415                         }
4416                 }
4417         }
4418
4419         return 0;
4420
4421 err_out:
4422         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4423         pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
4424         pr_perm_msg(err);
4425         for (j = 0; j < i; j++)
4426                 zclose(obj->maps[j].fd);
4427         return err;
4428 }
4429
4430 #define BPF_CORE_SPEC_MAX_LEN 64
4431
4432 /* represents BPF CO-RE field or array element accessor */
4433 struct bpf_core_accessor {
4434         __u32 type_id;          /* struct/union type or array element type */
4435         __u32 idx;              /* field index or array index */
4436         const char *name;       /* field name or NULL for array accessor */
4437 };
4438
4439 struct bpf_core_spec {
4440         const struct btf *btf;
4441         /* high-level spec: named fields and array indices only */
4442         struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
4443         /* original unresolved (no skip_mods_or_typedefs) root type ID */
4444         __u32 root_type_id;
4445         /* CO-RE relocation kind */
4446         enum bpf_core_relo_kind relo_kind;
4447         /* high-level spec length */
4448         int len;
4449         /* raw, low-level spec: 1-to-1 with accessor spec string */
4450         int raw_spec[BPF_CORE_SPEC_MAX_LEN];
4451         /* raw spec length */
4452         int raw_len;
4453         /* field bit offset represented by spec */
4454         __u32 bit_offset;
4455 };
4456
4457 static bool str_is_empty(const char *s)
4458 {
4459         return !s || !s[0];
4460 }
4461
4462 static bool is_flex_arr(const struct btf *btf,
4463                         const struct bpf_core_accessor *acc,
4464                         const struct btf_array *arr)
4465 {
4466         const struct btf_type *t;
4467
4468         /* not a flexible array, if not inside a struct or has non-zero size */
4469         if (!acc->name || arr->nelems > 0)
4470                 return false;
4471
4472         /* has to be the last member of enclosing struct */
4473         t = btf__type_by_id(btf, acc->type_id);
4474         return acc->idx == btf_vlen(t) - 1;
4475 }
4476
4477 static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
4478 {
4479         switch (kind) {
4480         case BPF_FIELD_BYTE_OFFSET: return "byte_off";
4481         case BPF_FIELD_BYTE_SIZE: return "byte_sz";
4482         case BPF_FIELD_EXISTS: return "field_exists";
4483         case BPF_FIELD_SIGNED: return "signed";
4484         case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
4485         case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
4486         case BPF_TYPE_ID_LOCAL: return "local_type_id";
4487         case BPF_TYPE_ID_TARGET: return "target_type_id";
4488         case BPF_TYPE_EXISTS: return "type_exists";
4489         case BPF_TYPE_SIZE: return "type_size";
4490         case BPF_ENUMVAL_EXISTS: return "enumval_exists";
4491         case BPF_ENUMVAL_VALUE: return "enumval_value";
4492         default: return "unknown";
4493         }
4494 }
4495
4496 static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
4497 {
4498         switch (kind) {
4499         case BPF_FIELD_BYTE_OFFSET:
4500         case BPF_FIELD_BYTE_SIZE:
4501         case BPF_FIELD_EXISTS:
4502         case BPF_FIELD_SIGNED:
4503         case BPF_FIELD_LSHIFT_U64:
4504         case BPF_FIELD_RSHIFT_U64:
4505                 return true;
4506         default:
4507                 return false;
4508         }
4509 }
4510
4511 static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
4512 {
4513         switch (kind) {
4514         case BPF_TYPE_ID_LOCAL:
4515         case BPF_TYPE_ID_TARGET:
4516         case BPF_TYPE_EXISTS:
4517         case BPF_TYPE_SIZE:
4518                 return true;
4519         default:
4520                 return false;
4521         }
4522 }
4523
4524 static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
4525 {
4526         switch (kind) {
4527         case BPF_ENUMVAL_EXISTS:
4528         case BPF_ENUMVAL_VALUE:
4529                 return true;
4530         default:
4531                 return false;
4532         }
4533 }
4534
4535 /*
4536  * Turn bpf_core_relo into a low- and high-level spec representation,
4537  * validating correctness along the way, as well as calculating resulting
4538  * field bit offset, specified by accessor string. Low-level spec captures
4539  * every single level of nestedness, including traversing anonymous
4540  * struct/union members. High-level one only captures semantically meaningful
4541  * "turning points": named fields and array indicies.
4542  * E.g., for this case:
4543  *
4544  *   struct sample {
4545  *       int __unimportant;
4546  *       struct {
4547  *           int __1;
4548  *           int __2;
4549  *           int a[7];
4550  *       };
4551  *   };
4552  *
4553  *   struct sample *s = ...;
4554  *
4555  *   int x = &s->a[3]; // access string = '0:1:2:3'
4556  *
4557  * Low-level spec has 1:1 mapping with each element of access string (it's
4558  * just a parsed access string representation): [0, 1, 2, 3].
4559  *
4560  * High-level spec will capture only 3 points:
4561  *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
4562  *   - field 'a' access (corresponds to '2' in low-level spec);
4563  *   - array element #3 access (corresponds to '3' in low-level spec).
4564  *
4565  * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
4566  * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
4567  * spec and raw_spec are kept empty.
4568  *
4569  * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
4570  * string to specify enumerator's value index that need to be relocated.
4571  */
4572 static int bpf_core_parse_spec(const struct btf *btf,
4573                                __u32 type_id,
4574                                const char *spec_str,
4575                                enum bpf_core_relo_kind relo_kind,
4576                                struct bpf_core_spec *spec)
4577 {
4578         int access_idx, parsed_len, i;
4579         struct bpf_core_accessor *acc;
4580         const struct btf_type *t;
4581         const char *name;
4582         __u32 id;
4583         __s64 sz;
4584
4585         if (str_is_empty(spec_str) || *spec_str == ':')
4586                 return -EINVAL;
4587
4588         memset(spec, 0, sizeof(*spec));
4589         spec->btf = btf;
4590         spec->root_type_id = type_id;
4591         spec->relo_kind = relo_kind;
4592
4593         /* type-based relocations don't have a field access string */
4594         if (core_relo_is_type_based(relo_kind)) {
4595                 if (strcmp(spec_str, "0"))
4596                         return -EINVAL;
4597                 return 0;
4598         }
4599
4600         /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
4601         while (*spec_str) {
4602                 if (*spec_str == ':')
4603                         ++spec_str;
4604                 if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
4605                         return -EINVAL;
4606                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4607                         return -E2BIG;
4608                 spec_str += parsed_len;
4609                 spec->raw_spec[spec->raw_len++] = access_idx;
4610         }
4611
4612         if (spec->raw_len == 0)
4613                 return -EINVAL;
4614
4615         t = skip_mods_and_typedefs(btf, type_id, &id);
4616         if (!t)
4617                 return -EINVAL;
4618
4619         access_idx = spec->raw_spec[0];
4620         acc = &spec->spec[0];
4621         acc->type_id = id;
4622         acc->idx = access_idx;
4623         spec->len++;
4624
4625         if (core_relo_is_enumval_based(relo_kind)) {
4626                 if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
4627                         return -EINVAL;
4628
4629                 /* record enumerator name in a first accessor */
4630                 acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
4631                 return 0;
4632         }
4633
4634         if (!core_relo_is_field_based(relo_kind))
4635                 return -EINVAL;
4636
4637         sz = btf__resolve_size(btf, id);
4638         if (sz < 0)
4639                 return sz;
4640         spec->bit_offset = access_idx * sz * 8;
4641
4642         for (i = 1; i < spec->raw_len; i++) {
4643                 t = skip_mods_and_typedefs(btf, id, &id);
4644                 if (!t)
4645                         return -EINVAL;
4646
4647                 access_idx = spec->raw_spec[i];
4648                 acc = &spec->spec[spec->len];
4649
4650                 if (btf_is_composite(t)) {
4651                         const struct btf_member *m;
4652                         __u32 bit_offset;
4653
4654                         if (access_idx >= btf_vlen(t))
4655                                 return -EINVAL;
4656
4657                         bit_offset = btf_member_bit_offset(t, access_idx);
4658                         spec->bit_offset += bit_offset;
4659
4660                         m = btf_members(t) + access_idx;
4661                         if (m->name_off) {
4662                                 name = btf__name_by_offset(btf, m->name_off);
4663                                 if (str_is_empty(name))
4664                                         return -EINVAL;
4665
4666                                 acc->type_id = id;
4667                                 acc->idx = access_idx;
4668                                 acc->name = name;
4669                                 spec->len++;
4670                         }
4671
4672                         id = m->type;
4673                 } else if (btf_is_array(t)) {
4674                         const struct btf_array *a = btf_array(t);
4675                         bool flex;
4676
4677                         t = skip_mods_and_typedefs(btf, a->type, &id);
4678                         if (!t)
4679                                 return -EINVAL;
4680
4681                         flex = is_flex_arr(btf, acc - 1, a);
4682                         if (!flex && access_idx >= a->nelems)
4683                                 return -EINVAL;
4684
4685                         spec->spec[spec->len].type_id = id;
4686                         spec->spec[spec->len].idx = access_idx;
4687                         spec->len++;
4688
4689                         sz = btf__resolve_size(btf, id);
4690                         if (sz < 0)
4691                                 return sz;
4692                         spec->bit_offset += access_idx * sz * 8;
4693                 } else {
4694                         pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
4695                                 type_id, spec_str, i, id, btf_kind_str(t));
4696                         return -EINVAL;
4697                 }
4698         }
4699
4700         return 0;
4701 }
4702
4703 static bool bpf_core_is_flavor_sep(const char *s)
4704 {
4705         /* check X___Y name pattern, where X and Y are not underscores */
4706         return s[0] != '_' &&                                 /* X */
4707                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
4708                s[4] != '_';                                   /* Y */
4709 }
4710
4711 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
4712  * before last triple underscore. Struct name part after last triple
4713  * underscore is ignored by BPF CO-RE relocation during relocation matching.
4714  */
4715 static size_t bpf_core_essential_name_len(const char *name)
4716 {
4717         size_t n = strlen(name);
4718         int i;
4719
4720         for (i = n - 5; i >= 0; i--) {
4721                 if (bpf_core_is_flavor_sep(name + i))
4722                         return i + 1;
4723         }
4724         return n;
4725 }
4726
4727 struct core_cand
4728 {
4729         const struct btf *btf;
4730         const struct btf_type *t;
4731         const char *name;
4732         __u32 id;
4733 };
4734
4735 /* dynamically sized list of type IDs and its associated struct btf */
4736 struct core_cand_list {
4737         struct core_cand *cands;
4738         int len;
4739 };
4740
4741 static void bpf_core_free_cands(struct core_cand_list *cands)
4742 {
4743         free(cands->cands);
4744         free(cands);
4745 }
4746
4747 static int bpf_core_add_cands(struct core_cand *local_cand,
4748                               size_t local_essent_len,
4749                               const struct btf *targ_btf,
4750                               const char *targ_btf_name,
4751                               int targ_start_id,
4752                               struct core_cand_list *cands)
4753 {
4754         struct core_cand *new_cands, *cand;
4755         const struct btf_type *t;
4756         const char *targ_name;
4757         size_t targ_essent_len;
4758         int n, i;
4759
4760         n = btf__get_nr_types(targ_btf);
4761         for (i = targ_start_id; i <= n; i++) {
4762                 t = btf__type_by_id(targ_btf, i);
4763                 if (btf_kind(t) != btf_kind(local_cand->t))
4764                         continue;
4765
4766                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
4767                 if (str_is_empty(targ_name))
4768                         continue;
4769
4770                 targ_essent_len = bpf_core_essential_name_len(targ_name);
4771                 if (targ_essent_len != local_essent_len)
4772                         continue;
4773
4774                 if (strncmp(local_cand->name, targ_name, local_essent_len) != 0)
4775                         continue;
4776
4777                 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
4778                          local_cand->id, btf_kind_str(local_cand->t),
4779                          local_cand->name, i, btf_kind_str(t), targ_name,
4780                          targ_btf_name);
4781                 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
4782                                               sizeof(*cands->cands));
4783                 if (!new_cands)
4784                         return -ENOMEM;
4785
4786                 cand = &new_cands[cands->len];
4787                 cand->btf = targ_btf;
4788                 cand->t = t;
4789                 cand->name = targ_name;
4790                 cand->id = i;
4791
4792                 cands->cands = new_cands;
4793                 cands->len++;
4794         }
4795         return 0;
4796 }
4797
4798 static int load_module_btfs(struct bpf_object *obj)
4799 {
4800         struct bpf_btf_info info;
4801         struct module_btf *mod_btf;
4802         struct btf *btf;
4803         char name[64];
4804         __u32 id = 0, len;
4805         int err, fd;
4806
4807         if (obj->btf_modules_loaded)
4808                 return 0;
4809
4810         /* don't do this again, even if we find no module BTFs */
4811         obj->btf_modules_loaded = true;
4812
4813         /* kernel too old to support module BTFs */
4814         if (!kernel_supports(FEAT_MODULE_BTF))
4815                 return 0;
4816
4817         while (true) {
4818                 err = bpf_btf_get_next_id(id, &id);
4819                 if (err && errno == ENOENT)
4820                         return 0;
4821                 if (err) {
4822                         err = -errno;
4823                         pr_warn("failed to iterate BTF objects: %d\n", err);
4824                         return err;
4825                 }
4826
4827                 fd = bpf_btf_get_fd_by_id(id);
4828                 if (fd < 0) {
4829                         if (errno == ENOENT)
4830                                 continue; /* expected race: BTF was unloaded */
4831                         err = -errno;
4832                         pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
4833                         return err;
4834                 }
4835
4836                 len = sizeof(info);
4837                 memset(&info, 0, sizeof(info));
4838                 info.name = ptr_to_u64(name);
4839                 info.name_len = sizeof(name);
4840
4841                 err = bpf_obj_get_info_by_fd(fd, &info, &len);
4842                 if (err) {
4843                         err = -errno;
4844                         pr_warn("failed to get BTF object #%d info: %d\n", id, err);
4845                         goto err_out;
4846                 }
4847
4848                 /* ignore non-module BTFs */
4849                 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
4850                         close(fd);
4851                         continue;
4852                 }
4853
4854                 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
4855                 if (IS_ERR(btf)) {
4856                         pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n",
4857                                 name, id, PTR_ERR(btf));
4858                         err = PTR_ERR(btf);
4859                         goto err_out;
4860                 }
4861
4862                 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
4863                                         sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
4864                 if (err)
4865                         goto err_out;
4866
4867                 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
4868
4869                 mod_btf->btf = btf;
4870                 mod_btf->id = id;
4871                 mod_btf->fd = fd;
4872                 mod_btf->name = strdup(name);
4873                 if (!mod_btf->name) {
4874                         err = -ENOMEM;
4875                         goto err_out;
4876                 }
4877                 continue;
4878
4879 err_out:
4880                 close(fd);
4881                 return err;
4882         }
4883
4884         return 0;
4885 }
4886
4887 static struct core_cand_list *
4888 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
4889 {
4890         struct core_cand local_cand = {};
4891         struct core_cand_list *cands;
4892         const struct btf *main_btf;
4893         size_t local_essent_len;
4894         int err, i;
4895
4896         local_cand.btf = local_btf;
4897         local_cand.t = btf__type_by_id(local_btf, local_type_id);
4898         if (!local_cand.t)
4899                 return ERR_PTR(-EINVAL);
4900
4901         local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off);
4902         if (str_is_empty(local_cand.name))
4903                 return ERR_PTR(-EINVAL);
4904         local_essent_len = bpf_core_essential_name_len(local_cand.name);
4905
4906         cands = calloc(1, sizeof(*cands));
4907         if (!cands)
4908                 return ERR_PTR(-ENOMEM);
4909
4910         /* Attempt to find target candidates in vmlinux BTF first */
4911         main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
4912         err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
4913         if (err)
4914                 goto err_out;
4915
4916         /* if vmlinux BTF has any candidate, don't got for module BTFs */
4917         if (cands->len)
4918                 return cands;
4919
4920         /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
4921         if (obj->btf_vmlinux_override)
4922                 return cands;
4923
4924         /* now look through module BTFs, trying to still find candidates */
4925         err = load_module_btfs(obj);
4926         if (err)
4927                 goto err_out;
4928
4929         for (i = 0; i < obj->btf_module_cnt; i++) {
4930                 err = bpf_core_add_cands(&local_cand, local_essent_len,
4931                                          obj->btf_modules[i].btf,
4932                                          obj->btf_modules[i].name,
4933                                          btf__get_nr_types(obj->btf_vmlinux) + 1,
4934                                          cands);
4935                 if (err)
4936                         goto err_out;
4937         }
4938
4939         return cands;
4940 err_out:
4941         bpf_core_free_cands(cands);
4942         return ERR_PTR(err);
4943 }
4944
4945 /* Check two types for compatibility for the purpose of field access
4946  * relocation. const/volatile/restrict and typedefs are skipped to ensure we
4947  * are relocating semantically compatible entities:
4948  *   - any two STRUCTs/UNIONs are compatible and can be mixed;
4949  *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
4950  *   - any two PTRs are always compatible;
4951  *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
4952  *     least one of enums should be anonymous;
4953  *   - for ENUMs, check sizes, names are ignored;
4954  *   - for INT, size and signedness are ignored;
4955  *   - for ARRAY, dimensionality is ignored, element types are checked for
4956  *     compatibility recursively;
4957  *   - everything else shouldn't be ever a target of relocation.
4958  * These rules are not set in stone and probably will be adjusted as we get
4959  * more experience with using BPF CO-RE relocations.
4960  */
4961 static int bpf_core_fields_are_compat(const struct btf *local_btf,
4962                                       __u32 local_id,
4963                                       const struct btf *targ_btf,
4964                                       __u32 targ_id)
4965 {
4966         const struct btf_type *local_type, *targ_type;
4967
4968 recur:
4969         local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
4970         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4971         if (!local_type || !targ_type)
4972                 return -EINVAL;
4973
4974         if (btf_is_composite(local_type) && btf_is_composite(targ_type))
4975                 return 1;
4976         if (btf_kind(local_type) != btf_kind(targ_type))
4977                 return 0;
4978
4979         switch (btf_kind(local_type)) {
4980         case BTF_KIND_PTR:
4981                 return 1;
4982         case BTF_KIND_FWD:
4983         case BTF_KIND_ENUM: {
4984                 const char *local_name, *targ_name;
4985                 size_t local_len, targ_len;
4986
4987                 local_name = btf__name_by_offset(local_btf,
4988                                                  local_type->name_off);
4989                 targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
4990                 local_len = bpf_core_essential_name_len(local_name);
4991                 targ_len = bpf_core_essential_name_len(targ_name);
4992                 /* one of them is anonymous or both w/ same flavor-less names */
4993                 return local_len == 0 || targ_len == 0 ||
4994                        (local_len == targ_len &&
4995                         strncmp(local_name, targ_name, local_len) == 0);
4996         }
4997         case BTF_KIND_INT:
4998                 /* just reject deprecated bitfield-like integers; all other
4999                  * integers are by default compatible between each other
5000                  */
5001                 return btf_int_offset(local_type) == 0 &&
5002                        btf_int_offset(targ_type) == 0;
5003         case BTF_KIND_ARRAY:
5004                 local_id = btf_array(local_type)->type;
5005                 targ_id = btf_array(targ_type)->type;
5006                 goto recur;
5007         default:
5008                 pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
5009                         btf_kind(local_type), local_id, targ_id);
5010                 return 0;
5011         }
5012 }
5013
5014 /*
5015  * Given single high-level named field accessor in local type, find
5016  * corresponding high-level accessor for a target type. Along the way,
5017  * maintain low-level spec for target as well. Also keep updating target
5018  * bit offset.
5019  *
5020  * Searching is performed through recursive exhaustive enumeration of all
5021  * fields of a struct/union. If there are any anonymous (embedded)
5022  * structs/unions, they are recursively searched as well. If field with
5023  * desired name is found, check compatibility between local and target types,
5024  * before returning result.
5025  *
5026  * 1 is returned, if field is found.
5027  * 0 is returned if no compatible field is found.
5028  * <0 is returned on error.
5029  */
5030 static int bpf_core_match_member(const struct btf *local_btf,
5031                                  const struct bpf_core_accessor *local_acc,
5032                                  const struct btf *targ_btf,
5033                                  __u32 targ_id,
5034                                  struct bpf_core_spec *spec,
5035                                  __u32 *next_targ_id)
5036 {
5037         const struct btf_type *local_type, *targ_type;
5038         const struct btf_member *local_member, *m;
5039         const char *local_name, *targ_name;
5040         __u32 local_id;
5041         int i, n, found;
5042
5043         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
5044         if (!targ_type)
5045                 return -EINVAL;
5046         if (!btf_is_composite(targ_type))
5047                 return 0;
5048
5049         local_id = local_acc->type_id;
5050         local_type = btf__type_by_id(local_btf, local_id);
5051         local_member = btf_members(local_type) + local_acc->idx;
5052         local_name = btf__name_by_offset(local_btf, local_member->name_off);
5053
5054         n = btf_vlen(targ_type);
5055         m = btf_members(targ_type);
5056         for (i = 0; i < n; i++, m++) {
5057                 __u32 bit_offset;
5058
5059                 bit_offset = btf_member_bit_offset(targ_type, i);
5060
5061                 /* too deep struct/union/array nesting */
5062                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
5063                         return -E2BIG;
5064
5065                 /* speculate this member will be the good one */
5066                 spec->bit_offset += bit_offset;
5067                 spec->raw_spec[spec->raw_len++] = i;
5068
5069                 targ_name = btf__name_by_offset(targ_btf, m->name_off);
5070                 if (str_is_empty(targ_name)) {
5071                         /* embedded struct/union, we need to go deeper */
5072                         found = bpf_core_match_member(local_btf, local_acc,
5073                                                       targ_btf, m->type,
5074                                                       spec, next_targ_id);
5075                         if (found) /* either found or error */
5076                                 return found;
5077                 } else if (strcmp(local_name, targ_name) == 0) {
5078                         /* matching named field */
5079                         struct bpf_core_accessor *targ_acc;
5080
5081                         targ_acc = &spec->spec[spec->len++];
5082                         targ_acc->type_id = targ_id;
5083                         targ_acc->idx = i;
5084                         targ_acc->name = targ_name;
5085
5086                         *next_targ_id = m->type;
5087                         found = bpf_core_fields_are_compat(local_btf,
5088                                                            local_member->type,
5089                                                            targ_btf, m->type);
5090                         if (!found)
5091                                 spec->len--; /* pop accessor */
5092                         return found;
5093                 }
5094                 /* member turned out not to be what we looked for */
5095                 spec->bit_offset -= bit_offset;
5096                 spec->raw_len--;
5097         }
5098
5099         return 0;
5100 }
5101
5102 /* Check local and target types for compatibility. This check is used for
5103  * type-based CO-RE relocations and follow slightly different rules than
5104  * field-based relocations. This function assumes that root types were already
5105  * checked for name match. Beyond that initial root-level name check, names
5106  * are completely ignored. Compatibility rules are as follows:
5107  *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5108  *     kind should match for local and target types (i.e., STRUCT is not
5109  *     compatible with UNION);
5110  *   - for ENUMs, the size is ignored;
5111  *   - for INT, size and signedness are ignored;
5112  *   - for ARRAY, dimensionality is ignored, element types are checked for
5113  *     compatibility recursively;
5114  *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
5115  *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5116  *   - FUNC_PROTOs are compatible if they have compatible signature: same
5117  *     number of input args and compatible return and argument types.
5118  * These rules are not set in stone and probably will be adjusted as we get
5119  * more experience with using BPF CO-RE relocations.
5120  */
5121 static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5122                                      const struct btf *targ_btf, __u32 targ_id)
5123 {
5124         const struct btf_type *local_type, *targ_type;
5125         int depth = 32; /* max recursion depth */
5126
5127         /* caller made sure that names match (ignoring flavor suffix) */
5128         local_type = btf__type_by_id(local_btf, local_id);
5129         targ_type = btf__type_by_id(targ_btf, targ_id);
5130         if (btf_kind(local_type) != btf_kind(targ_type))
5131                 return 0;
5132
5133 recur:
5134         depth--;
5135         if (depth < 0)
5136                 return -EINVAL;
5137
5138         local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
5139         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
5140         if (!local_type || !targ_type)
5141                 return -EINVAL;
5142
5143         if (btf_kind(local_type) != btf_kind(targ_type))
5144                 return 0;
5145
5146         switch (btf_kind(local_type)) {
5147         case BTF_KIND_UNKN:
5148         case BTF_KIND_STRUCT:
5149         case BTF_KIND_UNION:
5150         case BTF_KIND_ENUM:
5151         case BTF_KIND_FWD:
5152                 return 1;
5153         case BTF_KIND_INT:
5154                 /* just reject deprecated bitfield-like integers; all other
5155                  * integers are by default compatible between each other
5156                  */
5157                 return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
5158         case BTF_KIND_PTR:
5159                 local_id = local_type->type;
5160                 targ_id = targ_type->type;
5161                 goto recur;
5162         case BTF_KIND_ARRAY:
5163                 local_id = btf_array(local_type)->type;
5164                 targ_id = btf_array(targ_type)->type;
5165                 goto recur;
5166         case BTF_KIND_FUNC_PROTO: {
5167                 struct btf_param *local_p = btf_params(local_type);
5168                 struct btf_param *targ_p = btf_params(targ_type);
5169                 __u16 local_vlen = btf_vlen(local_type);
5170                 __u16 targ_vlen = btf_vlen(targ_type);
5171                 int i, err;
5172
5173                 if (local_vlen != targ_vlen)
5174                         return 0;
5175
5176                 for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
5177                         skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
5178                         skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
5179                         err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
5180                         if (err <= 0)
5181                                 return err;
5182                 }
5183
5184                 /* tail recurse for return type check */
5185                 skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
5186                 skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
5187                 goto recur;
5188         }
5189         default:
5190                 pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
5191                         btf_kind_str(local_type), local_id, targ_id);
5192                 return 0;
5193         }
5194 }
5195
5196 /*
5197  * Try to match local spec to a target type and, if successful, produce full
5198  * target spec (high-level, low-level + bit offset).
5199  */
5200 static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
5201                                const struct btf *targ_btf, __u32 targ_id,
5202                                struct bpf_core_spec *targ_spec)
5203 {
5204         const struct btf_type *targ_type;
5205         const struct bpf_core_accessor *local_acc;
5206         struct bpf_core_accessor *targ_acc;
5207         int i, sz, matched;
5208
5209         memset(targ_spec, 0, sizeof(*targ_spec));
5210         targ_spec->btf = targ_btf;
5211         targ_spec->root_type_id = targ_id;
5212         targ_spec->relo_kind = local_spec->relo_kind;
5213
5214         if (core_relo_is_type_based(local_spec->relo_kind)) {
5215                 return bpf_core_types_are_compat(local_spec->btf,
5216                                                  local_spec->root_type_id,
5217                                                  targ_btf, targ_id);
5218         }
5219
5220         local_acc = &local_spec->spec[0];
5221         targ_acc = &targ_spec->spec[0];
5222
5223         if (core_relo_is_enumval_based(local_spec->relo_kind)) {
5224                 size_t local_essent_len, targ_essent_len;
5225                 const struct btf_enum *e;
5226                 const char *targ_name;
5227
5228                 /* has to resolve to an enum */
5229                 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
5230                 if (!btf_is_enum(targ_type))
5231                         return 0;
5232
5233                 local_essent_len = bpf_core_essential_name_len(local_acc->name);
5234
5235                 for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
5236                         targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
5237                         targ_essent_len = bpf_core_essential_name_len(targ_name);
5238                         if (targ_essent_len != local_essent_len)
5239                                 continue;
5240                         if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
5241                                 targ_acc->type_id = targ_id;
5242                                 targ_acc->idx = i;
5243                                 targ_acc->name = targ_name;
5244                                 targ_spec->len++;
5245                                 targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
5246                                 targ_spec->raw_len++;
5247                                 return 1;
5248                         }
5249                 }
5250                 return 0;
5251         }
5252
5253         if (!core_relo_is_field_based(local_spec->relo_kind))
5254                 return -EINVAL;
5255
5256         for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
5257                 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
5258                                                    &targ_id);
5259                 if (!targ_type)
5260                         return -EINVAL;
5261
5262                 if (local_acc->name) {
5263                         matched = bpf_core_match_member(local_spec->btf,
5264                                                         local_acc,
5265                                                         targ_btf, targ_id,
5266                                                         targ_spec, &targ_id);
5267                         if (matched <= 0)
5268                                 return matched;
5269                 } else {
5270                         /* for i=0, targ_id is already treated as array element
5271                          * type (because it's the original struct), for others
5272                          * we should find array element type first
5273                          */
5274                         if (i > 0) {
5275                                 const struct btf_array *a;
5276                                 bool flex;
5277
5278                                 if (!btf_is_array(targ_type))
5279                                         return 0;
5280
5281                                 a = btf_array(targ_type);
5282                                 flex = is_flex_arr(targ_btf, targ_acc - 1, a);
5283                                 if (!flex && local_acc->idx >= a->nelems)
5284                                         return 0;
5285                                 if (!skip_mods_and_typedefs(targ_btf, a->type,
5286                                                             &targ_id))
5287                                         return -EINVAL;
5288                         }
5289
5290                         /* too deep struct/union/array nesting */
5291                         if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
5292                                 return -E2BIG;
5293
5294                         targ_acc->type_id = targ_id;
5295                         targ_acc->idx = local_acc->idx;
5296                         targ_acc->name = NULL;
5297                         targ_spec->len++;
5298                         targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
5299                         targ_spec->raw_len++;
5300
5301                         sz = btf__resolve_size(targ_btf, targ_id);
5302                         if (sz < 0)
5303                                 return sz;
5304                         targ_spec->bit_offset += local_acc->idx * sz * 8;
5305                 }
5306         }
5307
5308         return 1;
5309 }
5310
5311 static int bpf_core_calc_field_relo(const struct bpf_program *prog,
5312                                     const struct bpf_core_relo *relo,
5313                                     const struct bpf_core_spec *spec,
5314                                     __u32 *val, __u32 *field_sz, __u32 *type_id,
5315                                     bool *validate)
5316 {
5317         const struct bpf_core_accessor *acc;
5318         const struct btf_type *t;
5319         __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
5320         const struct btf_member *m;
5321         const struct btf_type *mt;
5322         bool bitfield;
5323         __s64 sz;
5324
5325         *field_sz = 0;
5326
5327         if (relo->kind == BPF_FIELD_EXISTS) {
5328                 *val = spec ? 1 : 0;
5329                 return 0;
5330         }
5331
5332         if (!spec)
5333                 return -EUCLEAN; /* request instruction poisoning */
5334
5335         acc = &spec->spec[spec->len - 1];
5336         t = btf__type_by_id(spec->btf, acc->type_id);
5337
5338         /* a[n] accessor needs special handling */
5339         if (!acc->name) {
5340                 if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
5341                         *val = spec->bit_offset / 8;
5342                         /* remember field size for load/store mem size */
5343                         sz = btf__resolve_size(spec->btf, acc->type_id);
5344                         if (sz < 0)
5345                                 return -EINVAL;
5346                         *field_sz = sz;
5347                         *type_id = acc->type_id;
5348                 } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
5349                         sz = btf__resolve_size(spec->btf, acc->type_id);
5350                         if (sz < 0)
5351                                 return -EINVAL;
5352                         *val = sz;
5353                 } else {
5354                         pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
5355                                 prog->name, relo->kind, relo->insn_off / 8);
5356                         return -EINVAL;
5357                 }
5358                 if (validate)
5359                         *validate = true;
5360                 return 0;
5361         }
5362
5363         m = btf_members(t) + acc->idx;
5364         mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
5365         bit_off = spec->bit_offset;
5366         bit_sz = btf_member_bitfield_size(t, acc->idx);
5367
5368         bitfield = bit_sz > 0;
5369         if (bitfield) {
5370                 byte_sz = mt->size;
5371                 byte_off = bit_off / 8 / byte_sz * byte_sz;
5372                 /* figure out smallest int size necessary for bitfield load */
5373                 while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
5374                         if (byte_sz >= 8) {
5375                                 /* bitfield can't be read with 64-bit read */
5376                                 pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
5377                                         prog->name, relo->kind, relo->insn_off / 8);
5378                                 return -E2BIG;
5379                         }
5380                         byte_sz *= 2;
5381                         byte_off = bit_off / 8 / byte_sz * byte_sz;
5382                 }
5383         } else {
5384                 sz = btf__resolve_size(spec->btf, field_type_id);
5385                 if (sz < 0)
5386                         return -EINVAL;
5387                 byte_sz = sz;
5388                 byte_off = spec->bit_offset / 8;
5389                 bit_sz = byte_sz * 8;
5390         }
5391
5392         /* for bitfields, all the relocatable aspects are ambiguous and we
5393          * might disagree with compiler, so turn off validation of expected
5394          * value, except for signedness
5395          */
5396         if (validate)
5397                 *validate = !bitfield;
5398
5399         switch (relo->kind) {
5400         case BPF_FIELD_BYTE_OFFSET:
5401                 *val = byte_off;
5402                 if (!bitfield) {
5403                         *field_sz = byte_sz;
5404                         *type_id = field_type_id;
5405                 }
5406                 break;
5407         case BPF_FIELD_BYTE_SIZE:
5408                 *val = byte_sz;
5409                 break;
5410         case BPF_FIELD_SIGNED:
5411                 /* enums will be assumed unsigned */
5412                 *val = btf_is_enum(mt) ||
5413                        (btf_int_encoding(mt) & BTF_INT_SIGNED);
5414                 if (validate)
5415                         *validate = true; /* signedness is never ambiguous */
5416                 break;
5417         case BPF_FIELD_LSHIFT_U64:
5418 #if __BYTE_ORDER == __LITTLE_ENDIAN
5419                 *val = 64 - (bit_off + bit_sz - byte_off  * 8);
5420 #else
5421                 *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
5422 #endif
5423                 break;
5424         case BPF_FIELD_RSHIFT_U64:
5425                 *val = 64 - bit_sz;
5426                 if (validate)
5427                         *validate = true; /* right shift is never ambiguous */
5428                 break;
5429         case BPF_FIELD_EXISTS:
5430         default:
5431                 return -EOPNOTSUPP;
5432         }
5433
5434         return 0;
5435 }
5436
5437 static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
5438                                    const struct bpf_core_spec *spec,
5439                                    __u32 *val)
5440 {
5441         __s64 sz;
5442
5443         /* type-based relos return zero when target type is not found */
5444         if (!spec) {
5445                 *val = 0;
5446                 return 0;
5447         }
5448
5449         switch (relo->kind) {
5450         case BPF_TYPE_ID_TARGET:
5451                 *val = spec->root_type_id;
5452                 break;
5453         case BPF_TYPE_EXISTS:
5454                 *val = 1;
5455                 break;
5456         case BPF_TYPE_SIZE:
5457                 sz = btf__resolve_size(spec->btf, spec->root_type_id);
5458                 if (sz < 0)
5459                         return -EINVAL;
5460                 *val = sz;
5461                 break;
5462         case BPF_TYPE_ID_LOCAL:
5463         /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
5464         default:
5465                 return -EOPNOTSUPP;
5466         }
5467
5468         return 0;
5469 }
5470
5471 static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
5472                                       const struct bpf_core_spec *spec,
5473                                       __u32 *val)
5474 {
5475         const struct btf_type *t;
5476         const struct btf_enum *e;
5477
5478         switch (relo->kind) {
5479         case BPF_ENUMVAL_EXISTS:
5480                 *val = spec ? 1 : 0;
5481                 break;
5482         case BPF_ENUMVAL_VALUE:
5483                 if (!spec)
5484                         return -EUCLEAN; /* request instruction poisoning */
5485                 t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
5486                 e = btf_enum(t) + spec->spec[0].idx;
5487                 *val = e->val;
5488                 break;
5489         default:
5490                 return -EOPNOTSUPP;
5491         }
5492
5493         return 0;
5494 }
5495
5496 struct bpf_core_relo_res
5497 {
5498         /* expected value in the instruction, unless validate == false */
5499         __u32 orig_val;
5500         /* new value that needs to be patched up to */
5501         __u32 new_val;
5502         /* relocation unsuccessful, poison instruction, but don't fail load */
5503         bool poison;
5504         /* some relocations can't be validated against orig_val */
5505         bool validate;
5506         /* for field byte offset relocations or the forms:
5507          *     *(T *)(rX + <off>) = rY
5508          *     rX = *(T *)(rY + <off>),
5509          * we remember original and resolved field size to adjust direct
5510          * memory loads of pointers and integers; this is necessary for 32-bit
5511          * host kernel architectures, but also allows to automatically
5512          * relocate fields that were resized from, e.g., u32 to u64, etc.
5513          */
5514         bool fail_memsz_adjust;
5515         __u32 orig_sz;
5516         __u32 orig_type_id;
5517         __u32 new_sz;
5518         __u32 new_type_id;
5519 };
5520
5521 /* Calculate original and target relocation values, given local and target
5522  * specs and relocation kind. These values are calculated for each candidate.
5523  * If there are multiple candidates, resulting values should all be consistent
5524  * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
5525  * If instruction has to be poisoned, *poison will be set to true.
5526  */
5527 static int bpf_core_calc_relo(const struct bpf_program *prog,
5528                               const struct bpf_core_relo *relo,
5529                               int relo_idx,
5530                               const struct bpf_core_spec *local_spec,
5531                               const struct bpf_core_spec *targ_spec,
5532                               struct bpf_core_relo_res *res)
5533 {
5534         int err = -EOPNOTSUPP;
5535
5536         res->orig_val = 0;
5537         res->new_val = 0;
5538         res->poison = false;
5539         res->validate = true;
5540         res->fail_memsz_adjust = false;
5541         res->orig_sz = res->new_sz = 0;
5542         res->orig_type_id = res->new_type_id = 0;
5543
5544         if (core_relo_is_field_based(relo->kind)) {
5545                 err = bpf_core_calc_field_relo(prog, relo, local_spec,
5546                                                &res->orig_val, &res->orig_sz,
5547                                                &res->orig_type_id, &res->validate);
5548                 err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
5549                                                       &res->new_val, &res->new_sz,
5550                                                       &res->new_type_id, NULL);
5551                 if (err)
5552                         goto done;
5553                 /* Validate if it's safe to adjust load/store memory size.
5554                  * Adjustments are performed only if original and new memory
5555                  * sizes differ.
5556                  */
5557                 res->fail_memsz_adjust = false;
5558                 if (res->orig_sz != res->new_sz) {
5559                         const struct btf_type *orig_t, *new_t;
5560
5561                         orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
5562                         new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
5563
5564                         /* There are two use cases in which it's safe to
5565                          * adjust load/store's mem size:
5566                          *   - reading a 32-bit kernel pointer, while on BPF
5567                          *   size pointers are always 64-bit; in this case
5568                          *   it's safe to "downsize" instruction size due to
5569                          *   pointer being treated as unsigned integer with
5570                          *   zero-extended upper 32-bits;
5571                          *   - reading unsigned integers, again due to
5572                          *   zero-extension is preserving the value correctly.
5573                          *
5574                          * In all other cases it's incorrect to attempt to
5575                          * load/store field because read value will be
5576                          * incorrect, so we poison relocated instruction.
5577                          */
5578                         if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
5579                                 goto done;
5580                         if (btf_is_int(orig_t) && btf_is_int(new_t) &&
5581                             btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
5582                             btf_int_encoding(new_t) != BTF_INT_SIGNED)
5583                                 goto done;
5584
5585                         /* mark as invalid mem size adjustment, but this will
5586                          * only be checked for LDX/STX/ST insns
5587                          */
5588                         res->fail_memsz_adjust = true;
5589                 }
5590         } else if (core_relo_is_type_based(relo->kind)) {
5591                 err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
5592                 err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
5593         } else if (core_relo_is_enumval_based(relo->kind)) {
5594                 err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
5595                 err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
5596         }
5597
5598 done:
5599         if (err == -EUCLEAN) {
5600                 /* EUCLEAN is used to signal instruction poisoning request */
5601                 res->poison = true;
5602                 err = 0;
5603         } else if (err == -EOPNOTSUPP) {
5604                 /* EOPNOTSUPP means unknown/unsupported relocation */
5605                 pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
5606                         prog->name, relo_idx, core_relo_kind_str(relo->kind),
5607                         relo->kind, relo->insn_off / 8);
5608         }
5609
5610         return err;
5611 }
5612
5613 /*
5614  * Turn instruction for which CO_RE relocation failed into invalid one with
5615  * distinct signature.
5616  */
5617 static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
5618                                  int insn_idx, struct bpf_insn *insn)
5619 {
5620         pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
5621                  prog->name, relo_idx, insn_idx);
5622         insn->code = BPF_JMP | BPF_CALL;
5623         insn->dst_reg = 0;
5624         insn->src_reg = 0;
5625         insn->off = 0;
5626         /* if this instruction is reachable (not a dead code),
5627          * verifier will complain with the following message:
5628          * invalid func unknown#195896080
5629          */
5630         insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
5631 }
5632
5633 static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
5634 {
5635         switch (BPF_SIZE(insn->code)) {
5636         case BPF_DW: return 8;
5637         case BPF_W: return 4;
5638         case BPF_H: return 2;
5639         case BPF_B: return 1;
5640         default: return -1;
5641         }
5642 }
5643
5644 static int insn_bytes_to_bpf_size(__u32 sz)
5645 {
5646         switch (sz) {
5647         case 8: return BPF_DW;
5648         case 4: return BPF_W;
5649         case 2: return BPF_H;
5650         case 1: return BPF_B;
5651         default: return -1;
5652         }
5653 }
5654
5655 /*
5656  * Patch relocatable BPF instruction.
5657  *
5658  * Patched value is determined by relocation kind and target specification.
5659  * For existence relocations target spec will be NULL if field/type is not found.
5660  * Expected insn->imm value is determined using relocation kind and local
5661  * spec, and is checked before patching instruction. If actual insn->imm value
5662  * is wrong, bail out with error.
5663  *
5664  * Currently supported classes of BPF instruction are:
5665  * 1. rX = <imm> (assignment with immediate operand);
5666  * 2. rX += <imm> (arithmetic operations with immediate operand);
5667  * 3. rX = <imm64> (load with 64-bit immediate value);
5668  * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
5669  * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
5670  * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
5671  */
5672 static int bpf_core_patch_insn(struct bpf_program *prog,
5673                                const struct bpf_core_relo *relo,
5674                                int relo_idx,
5675                                const struct bpf_core_relo_res *res)
5676 {
5677         __u32 orig_val, new_val;
5678         struct bpf_insn *insn;
5679         int insn_idx;
5680         __u8 class;
5681
5682         if (relo->insn_off % BPF_INSN_SZ)
5683                 return -EINVAL;
5684         insn_idx = relo->insn_off / BPF_INSN_SZ;
5685         /* adjust insn_idx from section frame of reference to the local
5686          * program's frame of reference; (sub-)program code is not yet
5687          * relocated, so it's enough to just subtract in-section offset
5688          */
5689         insn_idx = insn_idx - prog->sec_insn_off;
5690         insn = &prog->insns[insn_idx];
5691         class = BPF_CLASS(insn->code);
5692
5693         if (res->poison) {
5694 poison:
5695                 /* poison second part of ldimm64 to avoid confusing error from
5696                  * verifier about "unknown opcode 00"
5697                  */
5698                 if (is_ldimm64(insn))
5699                         bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
5700                 bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
5701                 return 0;
5702         }
5703
5704         orig_val = res->orig_val;
5705         new_val = res->new_val;
5706
5707         switch (class) {
5708         case BPF_ALU:
5709         case BPF_ALU64:
5710                 if (BPF_SRC(insn->code) != BPF_K)
5711                         return -EINVAL;
5712                 if (res->validate && insn->imm != orig_val) {
5713                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
5714                                 prog->name, relo_idx,
5715                                 insn_idx, insn->imm, orig_val, new_val);
5716                         return -EINVAL;
5717                 }
5718                 orig_val = insn->imm;
5719                 insn->imm = new_val;
5720                 pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
5721                          prog->name, relo_idx, insn_idx,
5722                          orig_val, new_val);
5723                 break;
5724         case BPF_LDX:
5725         case BPF_ST:
5726         case BPF_STX:
5727                 if (res->validate && insn->off != orig_val) {
5728                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
5729                                 prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
5730                         return -EINVAL;
5731                 }
5732                 if (new_val > SHRT_MAX) {
5733                         pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
5734                                 prog->name, relo_idx, insn_idx, new_val);
5735                         return -ERANGE;
5736                 }
5737                 if (res->fail_memsz_adjust) {
5738                         pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
5739                                 "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
5740                                 prog->name, relo_idx, insn_idx);
5741                         goto poison;
5742                 }
5743
5744                 orig_val = insn->off;
5745                 insn->off = new_val;
5746                 pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
5747                          prog->name, relo_idx, insn_idx, orig_val, new_val);
5748
5749                 if (res->new_sz != res->orig_sz) {
5750                         int insn_bytes_sz, insn_bpf_sz;
5751
5752                         insn_bytes_sz = insn_bpf_size_to_bytes(insn);
5753                         if (insn_bytes_sz != res->orig_sz) {
5754                                 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
5755                                         prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
5756                                 return -EINVAL;
5757                         }
5758
5759                         insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
5760                         if (insn_bpf_sz < 0) {
5761                                 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
5762                                         prog->name, relo_idx, insn_idx, res->new_sz);
5763                                 return -EINVAL;
5764                         }
5765
5766                         insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
5767                         pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
5768                                  prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
5769                 }
5770                 break;
5771         case BPF_LD: {
5772                 __u64 imm;
5773
5774                 if (!is_ldimm64(insn) ||
5775                     insn[0].src_reg != 0 || insn[0].off != 0 ||
5776                     insn_idx + 1 >= prog->insns_cnt ||
5777                     insn[1].code != 0 || insn[1].dst_reg != 0 ||
5778                     insn[1].src_reg != 0 || insn[1].off != 0) {
5779                         pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
5780                                 prog->name, relo_idx, insn_idx);
5781                         return -EINVAL;
5782                 }
5783
5784                 imm = insn[0].imm + ((__u64)insn[1].imm << 32);
5785                 if (res->validate && imm != orig_val) {
5786                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
5787                                 prog->name, relo_idx,
5788                                 insn_idx, (unsigned long long)imm,
5789                                 orig_val, new_val);
5790                         return -EINVAL;
5791                 }
5792
5793                 insn[0].imm = new_val;
5794                 insn[1].imm = 0; /* currently only 32-bit values are supported */
5795                 pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
5796                          prog->name, relo_idx, insn_idx,
5797                          (unsigned long long)imm, new_val);
5798                 break;
5799         }
5800         default:
5801                 pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
5802                         prog->name, relo_idx, insn_idx, insn->code,
5803                         insn->src_reg, insn->dst_reg, insn->off, insn->imm);
5804                 return -EINVAL;
5805         }
5806
5807         return 0;
5808 }
5809
5810 /* Output spec definition in the format:
5811  * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
5812  * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
5813  */
5814 static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
5815 {
5816         const struct btf_type *t;
5817         const struct btf_enum *e;
5818         const char *s;
5819         __u32 type_id;
5820         int i;
5821
5822         type_id = spec->root_type_id;
5823         t = btf__type_by_id(spec->btf, type_id);
5824         s = btf__name_by_offset(spec->btf, t->name_off);
5825
5826         libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
5827
5828         if (core_relo_is_type_based(spec->relo_kind))
5829                 return;
5830
5831         if (core_relo_is_enumval_based(spec->relo_kind)) {
5832                 t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
5833                 e = btf_enum(t) + spec->raw_spec[0];
5834                 s = btf__name_by_offset(spec->btf, e->name_off);
5835
5836                 libbpf_print(level, "::%s = %u", s, e->val);
5837                 return;
5838         }
5839
5840         if (core_relo_is_field_based(spec->relo_kind)) {
5841                 for (i = 0; i < spec->len; i++) {
5842                         if (spec->spec[i].name)
5843                                 libbpf_print(level, ".%s", spec->spec[i].name);
5844                         else if (i > 0 || spec->spec[i].idx > 0)
5845                                 libbpf_print(level, "[%u]", spec->spec[i].idx);
5846                 }
5847
5848                 libbpf_print(level, " (");
5849                 for (i = 0; i < spec->raw_len; i++)
5850                         libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
5851
5852                 if (spec->bit_offset % 8)
5853                         libbpf_print(level, " @ offset %u.%u)",
5854                                      spec->bit_offset / 8, spec->bit_offset % 8);
5855                 else
5856                         libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
5857                 return;
5858         }
5859 }
5860
5861 static size_t bpf_core_hash_fn(const void *key, void *ctx)
5862 {
5863         return (size_t)key;
5864 }
5865
5866 static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
5867 {
5868         return k1 == k2;
5869 }
5870
5871 static void *u32_as_hash_key(__u32 x)
5872 {
5873         return (void *)(uintptr_t)x;
5874 }
5875
5876 /*
5877  * CO-RE relocate single instruction.
5878  *
5879  * The outline and important points of the algorithm:
5880  * 1. For given local type, find corresponding candidate target types.
5881  *    Candidate type is a type with the same "essential" name, ignoring
5882  *    everything after last triple underscore (___). E.g., `sample`,
5883  *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
5884  *    for each other. Names with triple underscore are referred to as
5885  *    "flavors" and are useful, among other things, to allow to
5886  *    specify/support incompatible variations of the same kernel struct, which
5887  *    might differ between different kernel versions and/or build
5888  *    configurations.
5889  *
5890  *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
5891  *    converter, when deduplicated BTF of a kernel still contains more than
5892  *    one different types with the same name. In that case, ___2, ___3, etc
5893  *    are appended starting from second name conflict. But start flavors are
5894  *    also useful to be defined "locally", in BPF program, to extract same
5895  *    data from incompatible changes between different kernel
5896  *    versions/configurations. For instance, to handle field renames between
5897  *    kernel versions, one can use two flavors of the struct name with the
5898  *    same common name and use conditional relocations to extract that field,
5899  *    depending on target kernel version.
5900  * 2. For each candidate type, try to match local specification to this
5901  *    candidate target type. Matching involves finding corresponding
5902  *    high-level spec accessors, meaning that all named fields should match,
5903  *    as well as all array accesses should be within the actual bounds. Also,
5904  *    types should be compatible (see bpf_core_fields_are_compat for details).
5905  * 3. It is supported and expected that there might be multiple flavors
5906  *    matching the spec. As long as all the specs resolve to the same set of
5907  *    offsets across all candidates, there is no error. If there is any
5908  *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
5909  *    imprefection of BTF deduplication, which can cause slight duplication of
5910  *    the same BTF type, if some directly or indirectly referenced (by
5911  *    pointer) type gets resolved to different actual types in different
5912  *    object files. If such situation occurs, deduplicated BTF will end up
5913  *    with two (or more) structurally identical types, which differ only in
5914  *    types they refer to through pointer. This should be OK in most cases and
5915  *    is not an error.
5916  * 4. Candidate types search is performed by linearly scanning through all
5917  *    types in target BTF. It is anticipated that this is overall more
5918  *    efficient memory-wise and not significantly worse (if not better)
5919  *    CPU-wise compared to prebuilding a map from all local type names to
5920  *    a list of candidate type names. It's also sped up by caching resolved
5921  *    list of matching candidates per each local "root" type ID, that has at
5922  *    least one bpf_core_relo associated with it. This list is shared
5923  *    between multiple relocations for the same type ID and is updated as some
5924  *    of the candidates are pruned due to structural incompatibility.
5925  */
5926 static int bpf_core_apply_relo(struct bpf_program *prog,
5927                                const struct bpf_core_relo *relo,
5928                                int relo_idx,
5929                                const struct btf *local_btf,
5930                                struct hashmap *cand_cache)
5931 {
5932         struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
5933         const void *type_key = u32_as_hash_key(relo->type_id);
5934         struct bpf_core_relo_res cand_res, targ_res;
5935         const struct btf_type *local_type;
5936         const char *local_name;
5937         struct core_cand_list *cands = NULL;
5938         __u32 local_id;
5939         const char *spec_str;
5940         int i, j, err;
5941
5942         local_id = relo->type_id;
5943         local_type = btf__type_by_id(local_btf, local_id);
5944         if (!local_type)
5945                 return -EINVAL;
5946
5947         local_name = btf__name_by_offset(local_btf, local_type->name_off);
5948         if (!local_name)
5949                 return -EINVAL;
5950
5951         spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
5952         if (str_is_empty(spec_str))
5953                 return -EINVAL;
5954
5955         err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
5956         if (err) {
5957                 pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
5958                         prog->name, relo_idx, local_id, btf_kind_str(local_type),
5959                         str_is_empty(local_name) ? "<anon>" : local_name,
5960                         spec_str, err);
5961                 return -EINVAL;
5962         }
5963
5964         pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
5965                  relo_idx, core_relo_kind_str(relo->kind), relo->kind);
5966         bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
5967         libbpf_print(LIBBPF_DEBUG, "\n");
5968
5969         /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
5970         if (relo->kind == BPF_TYPE_ID_LOCAL) {
5971                 targ_res.validate = true;
5972                 targ_res.poison = false;
5973                 targ_res.orig_val = local_spec.root_type_id;
5974                 targ_res.new_val = local_spec.root_type_id;
5975                 goto patch_insn;
5976         }
5977
5978         /* libbpf doesn't support candidate search for anonymous types */
5979         if (str_is_empty(spec_str)) {
5980                 pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
5981                         prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
5982                 return -EOPNOTSUPP;
5983         }
5984
5985         if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
5986                 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
5987                 if (IS_ERR(cands)) {
5988                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5989                                 prog->name, relo_idx, local_id, btf_kind_str(local_type),
5990                                 local_name, PTR_ERR(cands));
5991                         return PTR_ERR(cands);
5992                 }
5993                 err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
5994                 if (err) {
5995                         bpf_core_free_cands(cands);
5996                         return err;
5997                 }
5998         }
5999
6000         for (i = 0, j = 0; i < cands->len; i++) {
6001                 err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
6002                                           cands->cands[i].id, &cand_spec);
6003                 if (err < 0) {
6004                         pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
6005                                 prog->name, relo_idx, i);
6006                         bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
6007                         libbpf_print(LIBBPF_WARN, ": %d\n", err);
6008                         return err;
6009                 }
6010
6011                 pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
6012                          relo_idx, err == 0 ? "non-matching" : "matching", i);
6013                 bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
6014                 libbpf_print(LIBBPF_DEBUG, "\n");
6015
6016                 if (err == 0)
6017                         continue;
6018
6019                 err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
6020                 if (err)
6021                         return err;
6022
6023                 if (j == 0) {
6024                         targ_res = cand_res;
6025                         targ_spec = cand_spec;
6026                 } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
6027                         /* if there are many field relo candidates, they
6028                          * should all resolve to the same bit offset
6029                          */
6030                         pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
6031                                 prog->name, relo_idx, cand_spec.bit_offset,
6032                                 targ_spec.bit_offset);
6033                         return -EINVAL;
6034                 } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
6035                         /* all candidates should result in the same relocation
6036                          * decision and value, otherwise it's dangerous to
6037                          * proceed due to ambiguity
6038                          */
6039                         pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
6040                                 prog->name, relo_idx,
6041                                 cand_res.poison ? "failure" : "success", cand_res.new_val,
6042                                 targ_res.poison ? "failure" : "success", targ_res.new_val);
6043                         return -EINVAL;
6044                 }
6045
6046                 cands->cands[j++] = cands->cands[i];
6047         }
6048
6049         /*
6050          * For BPF_FIELD_EXISTS relo or when used BPF program has field
6051          * existence checks or kernel version/config checks, it's expected
6052          * that we might not find any candidates. In this case, if field
6053          * wasn't found in any candidate, the list of candidates shouldn't
6054          * change at all, we'll just handle relocating appropriately,
6055          * depending on relo's kind.
6056          */
6057         if (j > 0)
6058                 cands->len = j;
6059
6060         /*
6061          * If no candidates were found, it might be both a programmer error,
6062          * as well as expected case, depending whether instruction w/
6063          * relocation is guarded in some way that makes it unreachable (dead
6064          * code) if relocation can't be resolved. This is handled in
6065          * bpf_core_patch_insn() uniformly by replacing that instruction with
6066          * BPF helper call insn (using invalid helper ID). If that instruction
6067          * is indeed unreachable, then it will be ignored and eliminated by
6068          * verifier. If it was an error, then verifier will complain and point
6069          * to a specific instruction number in its log.
6070          */
6071         if (j == 0) {
6072                 pr_debug("prog '%s': relo #%d: no matching targets found\n",
6073                          prog->name, relo_idx);
6074
6075                 /* calculate single target relo result explicitly */
6076                 err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
6077                 if (err)
6078                         return err;
6079         }
6080
6081 patch_insn:
6082         /* bpf_core_patch_insn() should know how to handle missing targ_spec */
6083         err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
6084         if (err) {
6085                 pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
6086                         prog->name, relo_idx, relo->insn_off, err);
6087                 return -EINVAL;
6088         }
6089
6090         return 0;
6091 }
6092
6093 static int
6094 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
6095 {
6096         const struct btf_ext_info_sec *sec;
6097         const struct bpf_core_relo *rec;
6098         const struct btf_ext_info *seg;
6099         struct hashmap_entry *entry;
6100         struct hashmap *cand_cache = NULL;
6101         struct bpf_program *prog;
6102         const char *sec_name;
6103         int i, err = 0, insn_idx, sec_idx;
6104
6105         if (obj->btf_ext->core_relo_info.len == 0)
6106                 return 0;
6107
6108         if (targ_btf_path) {
6109                 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
6110                 if (IS_ERR_OR_NULL(obj->btf_vmlinux_override)) {
6111                         err = PTR_ERR(obj->btf_vmlinux_override);
6112                         pr_warn("failed to parse target BTF: %d\n", err);
6113                         return err;
6114                 }
6115         }
6116
6117         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
6118         if (IS_ERR(cand_cache)) {
6119                 err = PTR_ERR(cand_cache);
6120                 goto out;
6121         }
6122
6123         seg = &obj->btf_ext->core_relo_info;
6124         for_each_btf_ext_sec(seg, sec) {
6125                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
6126                 if (str_is_empty(sec_name)) {
6127                         err = -EINVAL;
6128                         goto out;
6129                 }
6130                 /* bpf_object's ELF is gone by now so it's not easy to find
6131                  * section index by section name, but we can find *any*
6132                  * bpf_program within desired section name and use it's
6133                  * prog->sec_idx to do a proper search by section index and
6134                  * instruction offset
6135                  */
6136                 prog = NULL;
6137                 for (i = 0; i < obj->nr_programs; i++) {
6138                         prog = &obj->programs[i];
6139                         if (strcmp(prog->sec_name, sec_name) == 0)
6140                                 break;
6141                 }
6142                 if (!prog) {
6143                         pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
6144                         return -ENOENT;
6145                 }
6146                 sec_idx = prog->sec_idx;
6147
6148                 pr_debug("sec '%s': found %d CO-RE relocations\n",
6149                          sec_name, sec->num_info);
6150
6151                 for_each_btf_ext_rec(seg, sec, i, rec) {
6152                         insn_idx = rec->insn_off / BPF_INSN_SZ;
6153                         prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
6154                         if (!prog) {
6155                                 pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n",
6156                                         sec_name, insn_idx, i);
6157                                 err = -EINVAL;
6158                                 goto out;
6159                         }
6160                         /* no need to apply CO-RE relocation if the program is
6161                          * not going to be loaded
6162                          */
6163                         if (!prog->load)
6164                                 continue;
6165
6166                         err = bpf_core_apply_relo(prog, rec, i, obj->btf, cand_cache);
6167                         if (err) {
6168                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
6169                                         prog->name, i, err);
6170                                 goto out;
6171                         }
6172                 }
6173         }
6174
6175 out:
6176         /* obj->btf_vmlinux and module BTFs are freed after object load */
6177         btf__free(obj->btf_vmlinux_override);
6178         obj->btf_vmlinux_override = NULL;
6179
6180         if (!IS_ERR_OR_NULL(cand_cache)) {
6181                 hashmap__for_each_entry(cand_cache, entry, i) {
6182                         bpf_core_free_cands(entry->value);
6183                 }
6184                 hashmap__free(cand_cache);
6185         }
6186         return err;
6187 }
6188
6189 /* Relocate data references within program code:
6190  *  - map references;
6191  *  - global variable references;
6192  *  - extern references.
6193  */
6194 static int
6195 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
6196 {
6197         int i;
6198
6199         for (i = 0; i < prog->nr_reloc; i++) {
6200                 struct reloc_desc *relo = &prog->reloc_desc[i];
6201                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6202                 struct extern_desc *ext;
6203
6204                 switch (relo->type) {
6205                 case RELO_LD64:
6206                         insn[0].src_reg = BPF_PSEUDO_MAP_FD;
6207                         insn[0].imm = obj->maps[relo->map_idx].fd;
6208                         relo->processed = true;
6209                         break;
6210                 case RELO_DATA:
6211                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6212                         insn[1].imm = insn[0].imm + relo->sym_off;
6213                         insn[0].imm = obj->maps[relo->map_idx].fd;
6214                         relo->processed = true;
6215                         break;
6216                 case RELO_EXTERN:
6217                         ext = &obj->externs[relo->sym_off];
6218                         if (ext->type == EXT_KCFG) {
6219                                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6220                                 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
6221                                 insn[1].imm = ext->kcfg.data_off;
6222                         } else /* EXT_KSYM */ {
6223                                 if (ext->ksym.type_id) { /* typed ksyms */
6224                                         insn[0].src_reg = BPF_PSEUDO_BTF_ID;
6225                                         insn[0].imm = ext->ksym.kernel_btf_id;
6226                                         insn[1].imm = ext->ksym.kernel_btf_obj_fd;
6227                                 } else { /* typeless ksyms */
6228                                         insn[0].imm = (__u32)ext->ksym.addr;
6229                                         insn[1].imm = ext->ksym.addr >> 32;
6230                                 }
6231                         }
6232                         relo->processed = true;
6233                         break;
6234                 case RELO_SUBPROG_ADDR:
6235                         insn[0].src_reg = BPF_PSEUDO_FUNC;
6236                         /* will be handled as a follow up pass */
6237                         break;
6238                 case RELO_CALL:
6239                         /* will be handled as a follow up pass */
6240                         break;
6241                 default:
6242                         pr_warn("prog '%s': relo #%d: bad relo type %d\n",
6243                                 prog->name, i, relo->type);
6244                         return -EINVAL;
6245                 }
6246         }
6247
6248         return 0;
6249 }
6250
6251 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
6252                                     const struct bpf_program *prog,
6253                                     const struct btf_ext_info *ext_info,
6254                                     void **prog_info, __u32 *prog_rec_cnt,
6255                                     __u32 *prog_rec_sz)
6256 {
6257         void *copy_start = NULL, *copy_end = NULL;
6258         void *rec, *rec_end, *new_prog_info;
6259         const struct btf_ext_info_sec *sec;
6260         size_t old_sz, new_sz;
6261         const char *sec_name;
6262         int i, off_adj;
6263
6264         for_each_btf_ext_sec(ext_info, sec) {
6265                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
6266                 if (!sec_name)
6267                         return -EINVAL;
6268                 if (strcmp(sec_name, prog->sec_name) != 0)
6269                         continue;
6270
6271                 for_each_btf_ext_rec(ext_info, sec, i, rec) {
6272                         __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
6273
6274                         if (insn_off < prog->sec_insn_off)
6275                                 continue;
6276                         if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
6277                                 break;
6278
6279                         if (!copy_start)
6280                                 copy_start = rec;
6281                         copy_end = rec + ext_info->rec_size;
6282                 }
6283
6284                 if (!copy_start)
6285                         return -ENOENT;
6286
6287                 /* append func/line info of a given (sub-)program to the main
6288                  * program func/line info
6289                  */
6290                 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
6291                 new_sz = old_sz + (copy_end - copy_start);
6292                 new_prog_info = realloc(*prog_info, new_sz);
6293                 if (!new_prog_info)
6294                         return -ENOMEM;
6295                 *prog_info = new_prog_info;
6296                 *prog_rec_cnt = new_sz / ext_info->rec_size;
6297                 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
6298
6299                 /* Kernel instruction offsets are in units of 8-byte
6300                  * instructions, while .BTF.ext instruction offsets generated
6301                  * by Clang are in units of bytes. So convert Clang offsets
6302                  * into kernel offsets and adjust offset according to program
6303                  * relocated position.
6304                  */
6305                 off_adj = prog->sub_insn_off - prog->sec_insn_off;
6306                 rec = new_prog_info + old_sz;
6307                 rec_end = new_prog_info + new_sz;
6308                 for (; rec < rec_end; rec += ext_info->rec_size) {
6309                         __u32 *insn_off = rec;
6310
6311                         *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
6312                 }
6313                 *prog_rec_sz = ext_info->rec_size;
6314                 return 0;
6315         }
6316
6317         return -ENOENT;
6318 }
6319
6320 static int
6321 reloc_prog_func_and_line_info(const struct bpf_object *obj,
6322                               struct bpf_program *main_prog,
6323                               const struct bpf_program *prog)
6324 {
6325         int err;
6326
6327         /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
6328          * supprot func/line info
6329          */
6330         if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))
6331                 return 0;
6332
6333         /* only attempt func info relocation if main program's func_info
6334          * relocation was successful
6335          */
6336         if (main_prog != prog && !main_prog->func_info)
6337                 goto line_info;
6338
6339         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
6340                                        &main_prog->func_info,
6341                                        &main_prog->func_info_cnt,
6342                                        &main_prog->func_info_rec_size);
6343         if (err) {
6344                 if (err != -ENOENT) {
6345                         pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
6346                                 prog->name, err);
6347                         return err;
6348                 }
6349                 if (main_prog->func_info) {
6350                         /*
6351                          * Some info has already been found but has problem
6352                          * in the last btf_ext reloc. Must have to error out.
6353                          */
6354                         pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
6355                         return err;
6356                 }
6357                 /* Have problem loading the very first info. Ignore the rest. */
6358                 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6359                         prog->name);
6360         }
6361
6362 line_info:
6363         /* don't relocate line info if main program's relocation failed */
6364         if (main_prog != prog && !main_prog->line_info)
6365                 return 0;
6366
6367         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6368                                        &main_prog->line_info,
6369                                        &main_prog->line_info_cnt,
6370                                        &main_prog->line_info_rec_size);
6371         if (err) {
6372                 if (err != -ENOENT) {
6373                         pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6374                                 prog->name, err);
6375                         return err;
6376                 }
6377                 if (main_prog->line_info) {
6378                         /*
6379                          * Some info has already been found but has problem
6380                          * in the last btf_ext reloc. Must have to error out.
6381                          */
6382                         pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6383                         return err;
6384                 }
6385                 /* Have problem loading the very first info. Ignore the rest. */
6386                 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6387                         prog->name);
6388         }
6389         return 0;
6390 }
6391
6392 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6393 {
6394         size_t insn_idx = *(const size_t *)key;
6395         const struct reloc_desc *relo = elem;
6396
6397         if (insn_idx == relo->insn_idx)
6398                 return 0;
6399         return insn_idx < relo->insn_idx ? -1 : 1;
6400 }
6401
6402 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6403 {
6404         return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6405                        sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6406 }
6407
6408 static int
6409 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6410                        struct bpf_program *prog)
6411 {
6412         size_t sub_insn_idx, insn_idx, new_cnt;
6413         struct bpf_program *subprog;
6414         struct bpf_insn *insns, *insn;
6415         struct reloc_desc *relo;
6416         int err;
6417
6418         err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6419         if (err)
6420                 return err;
6421
6422         for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6423                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6424                 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6425                         continue;
6426
6427                 relo = find_prog_insn_relo(prog, insn_idx);
6428                 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6429                         pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6430                                 prog->name, insn_idx, relo->type);
6431                         return -LIBBPF_ERRNO__RELOC;
6432                 }
6433                 if (relo) {
6434                         /* sub-program instruction index is a combination of
6435                          * an offset of a symbol pointed to by relocation and
6436                          * call instruction's imm field; for global functions,
6437                          * call always has imm = -1, but for static functions
6438                          * relocation is against STT_SECTION and insn->imm
6439                          * points to a start of a static function
6440                          *
6441                          * for subprog addr relocation, the relo->sym_off + insn->imm is
6442                          * the byte offset in the corresponding section.
6443                          */
6444                         if (relo->type == RELO_CALL)
6445                                 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6446                         else
6447                                 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6448                 } else if (insn_is_pseudo_func(insn)) {
6449                         /*
6450                          * RELO_SUBPROG_ADDR relo is always emitted even if both
6451                          * functions are in the same section, so it shouldn't reach here.
6452                          */
6453                         pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6454                                 prog->name, insn_idx);
6455                         return -LIBBPF_ERRNO__RELOC;
6456                 } else {
6457                         /* if subprogram call is to a static function within
6458                          * the same ELF section, there won't be any relocation
6459                          * emitted, but it also means there is no additional
6460                          * offset necessary, insns->imm is relative to
6461                          * instruction's original position within the section
6462                          */
6463                         sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6464                 }
6465
6466                 /* we enforce that sub-programs should be in .text section */
6467                 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6468                 if (!subprog) {
6469                         pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6470                                 prog->name);
6471                         return -LIBBPF_ERRNO__RELOC;
6472                 }
6473
6474                 /* if it's the first call instruction calling into this
6475                  * subprogram (meaning this subprog hasn't been processed
6476                  * yet) within the context of current main program:
6477                  *   - append it at the end of main program's instructions blog;
6478                  *   - process is recursively, while current program is put on hold;
6479                  *   - if that subprogram calls some other not yet processes
6480                  *   subprogram, same thing will happen recursively until
6481                  *   there are no more unprocesses subprograms left to append
6482                  *   and relocate.
6483                  */
6484                 if (subprog->sub_insn_off == 0) {
6485                         subprog->sub_insn_off = main_prog->insns_cnt;
6486
6487                         new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6488                         insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6489                         if (!insns) {
6490                                 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6491                                 return -ENOMEM;
6492                         }
6493                         main_prog->insns = insns;
6494                         main_prog->insns_cnt = new_cnt;
6495
6496                         memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6497                                subprog->insns_cnt * sizeof(*insns));
6498
6499                         pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6500                                  main_prog->name, subprog->insns_cnt, subprog->name);
6501
6502                         err = bpf_object__reloc_code(obj, main_prog, subprog);
6503                         if (err)
6504                                 return err;
6505                 }
6506
6507                 /* main_prog->insns memory could have been re-allocated, so
6508                  * calculate pointer again
6509                  */
6510                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6511                 /* calculate correct instruction position within current main
6512                  * prog; each main prog can have a different set of
6513                  * subprograms appended (potentially in different order as
6514                  * well), so position of any subprog can be different for
6515                  * different main programs */
6516                 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6517
6518                 if (relo)
6519                         relo->processed = true;
6520
6521                 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6522                          prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6523         }
6524
6525         return 0;
6526 }
6527
6528 /*
6529  * Relocate sub-program calls.
6530  *
6531  * Algorithm operates as follows. Each entry-point BPF program (referred to as
6532  * main prog) is processed separately. For each subprog (non-entry functions,
6533  * that can be called from either entry progs or other subprogs) gets their
6534  * sub_insn_off reset to zero. This serves as indicator that this subprogram
6535  * hasn't been yet appended and relocated within current main prog. Once its
6536  * relocated, sub_insn_off will point at the position within current main prog
6537  * where given subprog was appended. This will further be used to relocate all
6538  * the call instructions jumping into this subprog.
6539  *
6540  * We start with main program and process all call instructions. If the call
6541  * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6542  * is zero), subprog instructions are appended at the end of main program's
6543  * instruction array. Then main program is "put on hold" while we recursively
6544  * process newly appended subprogram. If that subprogram calls into another
6545  * subprogram that hasn't been appended, new subprogram is appended again to
6546  * the *main* prog's instructions (subprog's instructions are always left
6547  * untouched, as they need to be in unmodified state for subsequent main progs
6548  * and subprog instructions are always sent only as part of a main prog) and
6549  * the process continues recursively. Once all the subprogs called from a main
6550  * prog or any of its subprogs are appended (and relocated), all their
6551  * positions within finalized instructions array are known, so it's easy to
6552  * rewrite call instructions with correct relative offsets, corresponding to
6553  * desired target subprog.
6554  *
6555  * Its important to realize that some subprogs might not be called from some
6556  * main prog and any of its called/used subprogs. Those will keep their
6557  * subprog->sub_insn_off as zero at all times and won't be appended to current
6558  * main prog and won't be relocated within the context of current main prog.
6559  * They might still be used from other main progs later.
6560  *
6561  * Visually this process can be shown as below. Suppose we have two main
6562  * programs mainA and mainB and BPF object contains three subprogs: subA,
6563  * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6564  * subC both call subB:
6565  *
6566  *        +--------+ +-------+
6567  *        |        v v       |
6568  *     +--+---+ +--+-+-+ +---+--+
6569  *     | subA | | subB | | subC |
6570  *     +--+---+ +------+ +---+--+
6571  *        ^                  ^
6572  *        |                  |
6573  *    +---+-------+   +------+----+
6574  *    |   mainA   |   |   mainB   |
6575  *    +-----------+   +-----------+
6576  *
6577  * We'll start relocating mainA, will find subA, append it and start
6578  * processing sub A recursively:
6579  *
6580  *    +-----------+------+
6581  *    |   mainA   | subA |
6582  *    +-----------+------+
6583  *
6584  * At this point we notice that subB is used from subA, so we append it and
6585  * relocate (there are no further subcalls from subB):
6586  *
6587  *    +-----------+------+------+
6588  *    |   mainA   | subA | subB |
6589  *    +-----------+------+------+
6590  *
6591  * At this point, we relocate subA calls, then go one level up and finish with
6592  * relocatin mainA calls. mainA is done.
6593  *
6594  * For mainB process is similar but results in different order. We start with
6595  * mainB and skip subA and subB, as mainB never calls them (at least
6596  * directly), but we see subC is needed, so we append and start processing it:
6597  *
6598  *    +-----------+------+
6599  *    |   mainB   | subC |
6600  *    +-----------+------+
6601  * Now we see subC needs subB, so we go back to it, append and relocate it:
6602  *
6603  *    +-----------+------+------+
6604  *    |   mainB   | subC | subB |
6605  *    +-----------+------+------+
6606  *
6607  * At this point we unwind recursion, relocate calls in subC, then in mainB.
6608  */
6609 static int
6610 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6611 {
6612         struct bpf_program *subprog;
6613         int i, j, err;
6614
6615         /* mark all subprogs as not relocated (yet) within the context of
6616          * current main program
6617          */
6618         for (i = 0; i < obj->nr_programs; i++) {
6619                 subprog = &obj->programs[i];
6620                 if (!prog_is_subprog(obj, subprog))
6621                         continue;
6622
6623                 subprog->sub_insn_off = 0;
6624                 for (j = 0; j < subprog->nr_reloc; j++)
6625                         if (subprog->reloc_desc[j].type == RELO_CALL)
6626                                 subprog->reloc_desc[j].processed = false;
6627         }
6628
6629         err = bpf_object__reloc_code(obj, prog, prog);
6630         if (err)
6631                 return err;
6632
6633
6634         return 0;
6635 }
6636
6637 static int
6638 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
6639 {
6640         struct bpf_program *prog;
6641         size_t i;
6642         int err;
6643
6644         if (obj->btf_ext) {
6645                 err = bpf_object__relocate_core(obj, targ_btf_path);
6646                 if (err) {
6647                         pr_warn("failed to perform CO-RE relocations: %d\n",
6648                                 err);
6649                         return err;
6650                 }
6651         }
6652         /* relocate data references first for all programs and sub-programs,
6653          * as they don't change relative to code locations, so subsequent
6654          * subprogram processing won't need to re-calculate any of them
6655          */
6656         for (i = 0; i < obj->nr_programs; i++) {
6657                 prog = &obj->programs[i];
6658                 err = bpf_object__relocate_data(obj, prog);
6659                 if (err) {
6660                         pr_warn("prog '%s': failed to relocate data references: %d\n",
6661                                 prog->name, err);
6662                         return err;
6663                 }
6664         }
6665         /* now relocate subprogram calls and append used subprograms to main
6666          * programs; each copy of subprogram code needs to be relocated
6667          * differently for each main program, because its code location might
6668          * have changed
6669          */
6670         for (i = 0; i < obj->nr_programs; i++) {
6671                 prog = &obj->programs[i];
6672                 /* sub-program's sub-calls are relocated within the context of
6673                  * its main program only
6674                  */
6675                 if (prog_is_subprog(obj, prog))
6676                         continue;
6677
6678                 err = bpf_object__relocate_calls(obj, prog);
6679                 if (err) {
6680                         pr_warn("prog '%s': failed to relocate calls: %d\n",
6681                                 prog->name, err);
6682                         return err;
6683                 }
6684         }
6685         /* free up relocation descriptors */
6686         for (i = 0; i < obj->nr_programs; i++) {
6687                 prog = &obj->programs[i];
6688                 zfree(&prog->reloc_desc);
6689                 prog->nr_reloc = 0;
6690         }
6691         return 0;
6692 }
6693
6694 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
6695                                             GElf_Shdr *shdr, Elf_Data *data);
6696
6697 static int bpf_object__collect_map_relos(struct bpf_object *obj,
6698                                          GElf_Shdr *shdr, Elf_Data *data)
6699 {
6700         const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
6701         int i, j, nrels, new_sz;
6702         const struct btf_var_secinfo *vi = NULL;
6703         const struct btf_type *sec, *var, *def;
6704         struct bpf_map *map = NULL, *targ_map;
6705         const struct btf_member *member;
6706         const char *name, *mname;
6707         Elf_Data *symbols;
6708         unsigned int moff;
6709         GElf_Sym sym;
6710         GElf_Rel rel;
6711         void *tmp;
6712
6713         if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
6714                 return -EINVAL;
6715         sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
6716         if (!sec)
6717                 return -EINVAL;
6718
6719         symbols = obj->efile.symbols;
6720         nrels = shdr->sh_size / shdr->sh_entsize;
6721         for (i = 0; i < nrels; i++) {
6722                 if (!gelf_getrel(data, i, &rel)) {
6723                         pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
6724                         return -LIBBPF_ERRNO__FORMAT;
6725                 }
6726                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
6727                         pr_warn(".maps relo #%d: symbol %zx not found\n",
6728                                 i, (size_t)GELF_R_SYM(rel.r_info));
6729                         return -LIBBPF_ERRNO__FORMAT;
6730                 }
6731                 name = elf_sym_str(obj, sym.st_name) ?: "<?>";
6732                 if (sym.st_shndx != obj->efile.btf_maps_shndx) {
6733                         pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6734                                 i, name);
6735                         return -LIBBPF_ERRNO__RELOC;
6736                 }
6737
6738                 pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
6739                          i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
6740                          (size_t)rel.r_offset, sym.st_name, name);
6741
6742                 for (j = 0; j < obj->nr_maps; j++) {
6743                         map = &obj->maps[j];
6744                         if (map->sec_idx != obj->efile.btf_maps_shndx)
6745                                 continue;
6746
6747                         vi = btf_var_secinfos(sec) + map->btf_var_idx;
6748                         if (vi->offset <= rel.r_offset &&
6749                             rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
6750                                 break;
6751                 }
6752                 if (j == obj->nr_maps) {
6753                         pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
6754                                 i, name, (size_t)rel.r_offset);
6755                         return -EINVAL;
6756                 }
6757
6758                 if (!bpf_map_type__is_map_in_map(map->def.type))
6759                         return -EINVAL;
6760                 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
6761                     map->def.key_size != sizeof(int)) {
6762                         pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6763                                 i, map->name, sizeof(int));
6764                         return -EINVAL;
6765                 }
6766
6767                 targ_map = bpf_object__find_map_by_name(obj, name);
6768                 if (!targ_map)
6769                         return -ESRCH;
6770
6771                 var = btf__type_by_id(obj->btf, vi->type);
6772                 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
6773                 if (btf_vlen(def) == 0)
6774                         return -EINVAL;
6775                 member = btf_members(def) + btf_vlen(def) - 1;
6776                 mname = btf__name_by_offset(obj->btf, member->name_off);
6777                 if (strcmp(mname, "values"))
6778                         return -EINVAL;
6779
6780                 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
6781                 if (rel.r_offset - vi->offset < moff)
6782                         return -EINVAL;
6783
6784                 moff = rel.r_offset - vi->offset - moff;
6785                 /* here we use BPF pointer size, which is always 64 bit, as we
6786                  * are parsing ELF that was built for BPF target
6787                  */
6788                 if (moff % bpf_ptr_sz)
6789                         return -EINVAL;
6790                 moff /= bpf_ptr_sz;
6791                 if (moff >= map->init_slots_sz) {
6792                         new_sz = moff + 1;
6793                         tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
6794                         if (!tmp)
6795                                 return -ENOMEM;
6796                         map->init_slots = tmp;
6797                         memset(map->init_slots + map->init_slots_sz, 0,
6798                                (new_sz - map->init_slots_sz) * host_ptr_sz);
6799                         map->init_slots_sz = new_sz;
6800                 }
6801                 map->init_slots[moff] = targ_map;
6802
6803                 pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
6804                          i, map->name, moff, name);
6805         }
6806
6807         return 0;
6808 }
6809
6810 static int cmp_relocs(const void *_a, const void *_b)
6811 {
6812         const struct reloc_desc *a = _a;
6813         const struct reloc_desc *b = _b;
6814
6815         if (a->insn_idx != b->insn_idx)
6816                 return a->insn_idx < b->insn_idx ? -1 : 1;
6817
6818         /* no two relocations should have the same insn_idx, but ... */
6819         if (a->type != b->type)
6820                 return a->type < b->type ? -1 : 1;
6821
6822         return 0;
6823 }
6824
6825 static int bpf_object__collect_relos(struct bpf_object *obj)
6826 {
6827         int i, err;
6828
6829         for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
6830                 GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
6831                 Elf_Data *data = obj->efile.reloc_sects[i].data;
6832                 int idx = shdr->sh_info;
6833
6834                 if (shdr->sh_type != SHT_REL) {
6835                         pr_warn("internal error at %d\n", __LINE__);
6836                         return -LIBBPF_ERRNO__INTERNAL;
6837                 }
6838
6839                 if (idx == obj->efile.st_ops_shndx)
6840                         err = bpf_object__collect_st_ops_relos(obj, shdr, data);
6841                 else if (idx == obj->efile.btf_maps_shndx)
6842                         err = bpf_object__collect_map_relos(obj, shdr, data);
6843                 else
6844                         err = bpf_object__collect_prog_relos(obj, shdr, data);
6845                 if (err)
6846                         return err;
6847         }
6848
6849         for (i = 0; i < obj->nr_programs; i++) {
6850                 struct bpf_program *p = &obj->programs[i];
6851                 
6852                 if (!p->nr_reloc)
6853                         continue;
6854
6855                 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6856         }
6857         return 0;
6858 }
6859
6860 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
6861 {
6862         if (BPF_CLASS(insn->code) == BPF_JMP &&
6863             BPF_OP(insn->code) == BPF_CALL &&
6864             BPF_SRC(insn->code) == BPF_K &&
6865             insn->src_reg == 0 &&
6866             insn->dst_reg == 0) {
6867                     *func_id = insn->imm;
6868                     return true;
6869         }
6870         return false;
6871 }
6872
6873 static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
6874 {
6875         struct bpf_insn *insn = prog->insns;
6876         enum bpf_func_id func_id;
6877         int i;
6878
6879         for (i = 0; i < prog->insns_cnt; i++, insn++) {
6880                 if (!insn_is_helper_call(insn, &func_id))
6881                         continue;
6882
6883                 /* on kernels that don't yet support
6884                  * bpf_probe_read_{kernel,user}[_str] helpers, fall back
6885                  * to bpf_probe_read() which works well for old kernels
6886                  */
6887                 switch (func_id) {
6888                 case BPF_FUNC_probe_read_kernel:
6889                 case BPF_FUNC_probe_read_user:
6890                         if (!kernel_supports(FEAT_PROBE_READ_KERN))
6891                                 insn->imm = BPF_FUNC_probe_read;
6892                         break;
6893                 case BPF_FUNC_probe_read_kernel_str:
6894                 case BPF_FUNC_probe_read_user_str:
6895                         if (!kernel_supports(FEAT_PROBE_READ_KERN))
6896                                 insn->imm = BPF_FUNC_probe_read_str;
6897                         break;
6898                 default:
6899                         break;
6900                 }
6901         }
6902         return 0;
6903 }
6904
6905 static int
6906 load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
6907              char *license, __u32 kern_version, int *pfd)
6908 {
6909         struct bpf_prog_load_params load_attr = {};
6910         char *cp, errmsg[STRERR_BUFSIZE];
6911         size_t log_buf_size = 0;
6912         char *log_buf = NULL;
6913         int btf_fd, ret;
6914
6915         if (prog->type == BPF_PROG_TYPE_UNSPEC) {
6916                 /*
6917                  * The program type must be set.  Most likely we couldn't find a proper
6918                  * section definition at load time, and thus we didn't infer the type.
6919                  */
6920                 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
6921                         prog->name, prog->sec_name);
6922                 return -EINVAL;
6923         }
6924
6925         if (!insns || !insns_cnt)
6926                 return -EINVAL;
6927
6928         load_attr.prog_type = prog->type;
6929         /* old kernels might not support specifying expected_attach_type */
6930         if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
6931             prog->sec_def->is_exp_attach_type_optional)
6932                 load_attr.expected_attach_type = 0;
6933         else
6934                 load_attr.expected_attach_type = prog->expected_attach_type;
6935         if (kernel_supports(FEAT_PROG_NAME))
6936                 load_attr.name = prog->name;
6937         load_attr.insns = insns;
6938         load_attr.insn_cnt = insns_cnt;
6939         load_attr.license = license;
6940         load_attr.attach_btf_id = prog->attach_btf_id;
6941         if (prog->attach_prog_fd)
6942                 load_attr.attach_prog_fd = prog->attach_prog_fd;
6943         else
6944                 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
6945         load_attr.attach_btf_id = prog->attach_btf_id;
6946         load_attr.kern_version = kern_version;
6947         load_attr.prog_ifindex = prog->prog_ifindex;
6948
6949         /* specify func_info/line_info only if kernel supports them */
6950         btf_fd = bpf_object__btf_fd(prog->obj);
6951         if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
6952                 load_attr.prog_btf_fd = btf_fd;
6953                 load_attr.func_info = prog->func_info;
6954                 load_attr.func_info_rec_size = prog->func_info_rec_size;
6955                 load_attr.func_info_cnt = prog->func_info_cnt;
6956                 load_attr.line_info = prog->line_info;
6957                 load_attr.line_info_rec_size = prog->line_info_rec_size;
6958                 load_attr.line_info_cnt = prog->line_info_cnt;
6959         }
6960         load_attr.log_level = prog->log_level;
6961         load_attr.prog_flags = prog->prog_flags;
6962
6963 retry_load:
6964         if (log_buf_size) {
6965                 log_buf = malloc(log_buf_size);
6966                 if (!log_buf)
6967                         return -ENOMEM;
6968
6969                 *log_buf = 0;
6970         }
6971
6972         load_attr.log_buf = log_buf;
6973         load_attr.log_buf_sz = log_buf_size;
6974         ret = libbpf__bpf_prog_load(&load_attr);
6975
6976         if (ret >= 0) {
6977                 if (log_buf && load_attr.log_level)
6978                         pr_debug("verifier log:\n%s", log_buf);
6979
6980                 if (prog->obj->rodata_map_idx >= 0 &&
6981                     kernel_supports(FEAT_PROG_BIND_MAP)) {
6982                         struct bpf_map *rodata_map =
6983                                 &prog->obj->maps[prog->obj->rodata_map_idx];
6984
6985                         if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) {
6986                                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6987                                 pr_warn("prog '%s': failed to bind .rodata map: %s\n",
6988                                         prog->name, cp);
6989                                 /* Don't fail hard if can't bind rodata. */
6990                         }
6991                 }
6992
6993                 *pfd = ret;
6994                 ret = 0;
6995                 goto out;
6996         }
6997
6998         if (!log_buf || errno == ENOSPC) {
6999                 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
7000                                    log_buf_size << 1);
7001
7002                 free(log_buf);
7003                 goto retry_load;
7004         }
7005         ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
7006         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7007         pr_warn("load bpf program failed: %s\n", cp);
7008         pr_perm_msg(ret);
7009
7010         if (log_buf && log_buf[0] != '\0') {
7011                 ret = -LIBBPF_ERRNO__VERIFY;
7012                 pr_warn("-- BEGIN DUMP LOG ---\n");
7013                 pr_warn("\n%s\n", log_buf);
7014                 pr_warn("-- END LOG --\n");
7015         } else if (load_attr.insn_cnt >= BPF_MAXINSNS) {
7016                 pr_warn("Program too large (%zu insns), at most %d insns\n",
7017                         load_attr.insn_cnt, BPF_MAXINSNS);
7018                 ret = -LIBBPF_ERRNO__PROG2BIG;
7019         } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
7020                 /* Wrong program type? */
7021                 int fd;
7022
7023                 load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
7024                 load_attr.expected_attach_type = 0;
7025                 load_attr.log_buf = NULL;
7026                 load_attr.log_buf_sz = 0;
7027                 fd = libbpf__bpf_prog_load(&load_attr);
7028                 if (fd >= 0) {
7029                         close(fd);
7030                         ret = -LIBBPF_ERRNO__PROGTYPE;
7031                         goto out;
7032                 }
7033         }
7034
7035 out:
7036         free(log_buf);
7037         return ret;
7038 }
7039
7040 static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id);
7041
7042 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
7043 {
7044         int err = 0, fd, i;
7045
7046         if (prog->obj->loaded) {
7047                 pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
7048                 return -EINVAL;
7049         }
7050
7051         if ((prog->type == BPF_PROG_TYPE_TRACING ||
7052              prog->type == BPF_PROG_TYPE_LSM ||
7053              prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
7054                 int btf_obj_fd = 0, btf_type_id = 0;
7055
7056                 err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id);
7057                 if (err)
7058                         return err;
7059
7060                 prog->attach_btf_obj_fd = btf_obj_fd;
7061                 prog->attach_btf_id = btf_type_id;
7062         }
7063
7064         if (prog->instances.nr < 0 || !prog->instances.fds) {
7065                 if (prog->preprocessor) {
7066                         pr_warn("Internal error: can't load program '%s'\n",
7067                                 prog->name);
7068                         return -LIBBPF_ERRNO__INTERNAL;
7069                 }
7070
7071                 prog->instances.fds = malloc(sizeof(int));
7072                 if (!prog->instances.fds) {
7073                         pr_warn("Not enough memory for BPF fds\n");
7074                         return -ENOMEM;
7075                 }
7076                 prog->instances.nr = 1;
7077                 prog->instances.fds[0] = -1;
7078         }
7079
7080         if (!prog->preprocessor) {
7081                 if (prog->instances.nr != 1) {
7082                         pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
7083                                 prog->name, prog->instances.nr);
7084                 }
7085                 err = load_program(prog, prog->insns, prog->insns_cnt,
7086                                    license, kern_ver, &fd);
7087                 if (!err)
7088                         prog->instances.fds[0] = fd;
7089                 goto out;
7090         }
7091
7092         for (i = 0; i < prog->instances.nr; i++) {
7093                 struct bpf_prog_prep_result result;
7094                 bpf_program_prep_t preprocessor = prog->preprocessor;
7095
7096                 memset(&result, 0, sizeof(result));
7097                 err = preprocessor(prog, i, prog->insns,
7098                                    prog->insns_cnt, &result);
7099                 if (err) {
7100                         pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
7101                                 i, prog->name);
7102                         goto out;
7103                 }
7104
7105                 if (!result.new_insn_ptr || !result.new_insn_cnt) {
7106                         pr_debug("Skip loading the %dth instance of program '%s'\n",
7107                                  i, prog->name);
7108                         prog->instances.fds[i] = -1;
7109                         if (result.pfd)
7110                                 *result.pfd = -1;
7111                         continue;
7112                 }
7113
7114                 err = load_program(prog, result.new_insn_ptr,
7115                                    result.new_insn_cnt, license, kern_ver, &fd);
7116                 if (err) {
7117                         pr_warn("Loading the %dth instance of program '%s' failed\n",
7118                                 i, prog->name);
7119                         goto out;
7120                 }
7121
7122                 if (result.pfd)
7123                         *result.pfd = fd;
7124                 prog->instances.fds[i] = fd;
7125         }
7126 out:
7127         if (err)
7128                 pr_warn("failed to load program '%s'\n", prog->name);
7129         zfree(&prog->insns);
7130         prog->insns_cnt = 0;
7131         return err;
7132 }
7133
7134 static int
7135 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7136 {
7137         struct bpf_program *prog;
7138         size_t i;
7139         int err;
7140
7141         for (i = 0; i < obj->nr_programs; i++) {
7142                 prog = &obj->programs[i];
7143                 err = bpf_object__sanitize_prog(obj, prog);
7144                 if (err)
7145                         return err;
7146         }
7147
7148         for (i = 0; i < obj->nr_programs; i++) {
7149                 prog = &obj->programs[i];
7150                 if (prog_is_subprog(obj, prog))
7151                         continue;
7152                 if (!prog->load) {
7153                         pr_debug("prog '%s': skipped loading\n", prog->name);
7154                         continue;
7155                 }
7156                 prog->log_level |= log_level;
7157                 err = bpf_program__load(prog, obj->license, obj->kern_version);
7158                 if (err)
7159                         return err;
7160         }
7161         return 0;
7162 }
7163
7164 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7165
7166 static struct bpf_object *
7167 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7168                    const struct bpf_object_open_opts *opts)
7169 {
7170         const char *obj_name, *kconfig;
7171         struct bpf_program *prog;
7172         struct bpf_object *obj;
7173         char tmp_name[64];
7174         int err;
7175
7176         if (elf_version(EV_CURRENT) == EV_NONE) {
7177                 pr_warn("failed to init libelf for %s\n",
7178                         path ? : "(mem buf)");
7179                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7180         }
7181
7182         if (!OPTS_VALID(opts, bpf_object_open_opts))
7183                 return ERR_PTR(-EINVAL);
7184
7185         obj_name = OPTS_GET(opts, object_name, NULL);
7186         if (obj_buf) {
7187                 if (!obj_name) {
7188                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
7189                                  (unsigned long)obj_buf,
7190                                  (unsigned long)obj_buf_sz);
7191                         obj_name = tmp_name;
7192                 }
7193                 path = obj_name;
7194                 pr_debug("loading object '%s' from buffer\n", obj_name);
7195         }
7196
7197         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
7198         if (IS_ERR(obj))
7199                 return obj;
7200
7201         kconfig = OPTS_GET(opts, kconfig, NULL);
7202         if (kconfig) {
7203                 obj->kconfig = strdup(kconfig);
7204                 if (!obj->kconfig)
7205                         return ERR_PTR(-ENOMEM);
7206         }
7207
7208         err = bpf_object__elf_init(obj);
7209         err = err ? : bpf_object__check_endianness(obj);
7210         err = err ? : bpf_object__elf_collect(obj);
7211         err = err ? : bpf_object__collect_externs(obj);
7212         err = err ? : bpf_object__finalize_btf(obj);
7213         err = err ? : bpf_object__init_maps(obj, opts);
7214         err = err ? : bpf_object__collect_relos(obj);
7215         if (err)
7216                 goto out;
7217         bpf_object__elf_finish(obj);
7218
7219         bpf_object__for_each_program(prog, obj) {
7220                 prog->sec_def = find_sec_def(prog->sec_name);
7221                 if (!prog->sec_def) {
7222                         /* couldn't guess, but user might manually specify */
7223                         pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7224                                 prog->name, prog->sec_name);
7225                         continue;
7226                 }
7227
7228                 if (prog->sec_def->is_sleepable)
7229                         prog->prog_flags |= BPF_F_SLEEPABLE;
7230                 bpf_program__set_type(prog, prog->sec_def->prog_type);
7231                 bpf_program__set_expected_attach_type(prog,
7232                                 prog->sec_def->expected_attach_type);
7233
7234                 if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
7235                     prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
7236                         prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
7237         }
7238
7239         return obj;
7240 out:
7241         bpf_object__close(obj);
7242         return ERR_PTR(err);
7243 }
7244
7245 static struct bpf_object *
7246 __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
7247 {
7248         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
7249                 .relaxed_maps = flags & MAPS_RELAX_COMPAT,
7250         );
7251
7252         /* param validation */
7253         if (!attr->file)
7254                 return NULL;
7255
7256         pr_debug("loading %s\n", attr->file);
7257         return __bpf_object__open(attr->file, NULL, 0, &opts);
7258 }
7259
7260 struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
7261 {
7262         return __bpf_object__open_xattr(attr, 0);
7263 }
7264
7265 struct bpf_object *bpf_object__open(const char *path)
7266 {
7267         struct bpf_object_open_attr attr = {
7268                 .file           = path,
7269                 .prog_type      = BPF_PROG_TYPE_UNSPEC,
7270         };
7271
7272         return bpf_object__open_xattr(&attr);
7273 }
7274
7275 struct bpf_object *
7276 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
7277 {
7278         if (!path)
7279                 return ERR_PTR(-EINVAL);
7280
7281         pr_debug("loading %s\n", path);
7282
7283         return __bpf_object__open(path, NULL, 0, opts);
7284 }
7285
7286 struct bpf_object *
7287 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7288                      const struct bpf_object_open_opts *opts)
7289 {
7290         if (!obj_buf || obj_buf_sz == 0)
7291                 return ERR_PTR(-EINVAL);
7292
7293         return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
7294 }
7295
7296 struct bpf_object *
7297 bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
7298                         const char *name)
7299 {
7300         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
7301                 .object_name = name,
7302                 /* wrong default, but backwards-compatible */
7303                 .relaxed_maps = true,
7304         );
7305
7306         /* returning NULL is wrong, but backwards-compatible */
7307         if (!obj_buf || obj_buf_sz == 0)
7308                 return NULL;
7309
7310         return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
7311 }
7312
7313 int bpf_object__unload(struct bpf_object *obj)
7314 {
7315         size_t i;
7316
7317         if (!obj)
7318                 return -EINVAL;
7319
7320         for (i = 0; i < obj->nr_maps; i++) {
7321                 zclose(obj->maps[i].fd);
7322                 if (obj->maps[i].st_ops)
7323                         zfree(&obj->maps[i].st_ops->kern_vdata);
7324         }
7325
7326         for (i = 0; i < obj->nr_programs; i++)
7327                 bpf_program__unload(&obj->programs[i]);
7328
7329         return 0;
7330 }
7331
7332 static int bpf_object__sanitize_maps(struct bpf_object *obj)
7333 {
7334         struct bpf_map *m;
7335
7336         bpf_object__for_each_map(m, obj) {
7337                 if (!bpf_map__is_internal(m))
7338                         continue;
7339                 if (!kernel_supports(FEAT_GLOBAL_DATA)) {
7340                         pr_warn("kernel doesn't support global data\n");
7341                         return -ENOTSUP;
7342                 }
7343                 if (!kernel_supports(FEAT_ARRAY_MMAP))
7344                         m->def.map_flags ^= BPF_F_MMAPABLE;
7345         }
7346
7347         return 0;
7348 }
7349
7350 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7351 {
7352         char sym_type, sym_name[500];
7353         unsigned long long sym_addr;
7354         struct extern_desc *ext;
7355         int ret, err = 0;
7356         FILE *f;
7357
7358         f = fopen("/proc/kallsyms", "r");
7359         if (!f) {
7360                 err = -errno;
7361                 pr_warn("failed to open /proc/kallsyms: %d\n", err);
7362                 return err;
7363         }
7364
7365         while (true) {
7366                 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7367                              &sym_addr, &sym_type, sym_name);
7368                 if (ret == EOF && feof(f))
7369                         break;
7370                 if (ret != 3) {
7371                         pr_warn("failed to read kallsyms entry: %d\n", ret);
7372                         err = -EINVAL;
7373                         goto out;
7374                 }
7375
7376                 ext = find_extern_by_name(obj, sym_name);
7377                 if (!ext || ext->type != EXT_KSYM)
7378                         continue;
7379
7380                 if (ext->is_set && ext->ksym.addr != sym_addr) {
7381                         pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
7382                                 sym_name, ext->ksym.addr, sym_addr);
7383                         err = -EINVAL;
7384                         goto out;
7385                 }
7386                 if (!ext->is_set) {
7387                         ext->is_set = true;
7388                         ext->ksym.addr = sym_addr;
7389                         pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
7390                 }
7391         }
7392
7393 out:
7394         fclose(f);
7395         return err;
7396 }
7397
7398 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
7399 {
7400         struct extern_desc *ext;
7401         struct btf *btf;
7402         int i, j, id, btf_fd, err;
7403
7404         for (i = 0; i < obj->nr_extern; i++) {
7405                 const struct btf_type *targ_var, *targ_type;
7406                 __u32 targ_type_id, local_type_id;
7407                 const char *targ_var_name;
7408                 int ret;
7409
7410                 ext = &obj->externs[i];
7411                 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
7412                         continue;
7413
7414                 btf = obj->btf_vmlinux;
7415                 btf_fd = 0;
7416                 id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
7417                 if (id == -ENOENT) {
7418                         err = load_module_btfs(obj);
7419                         if (err)
7420                                 return err;
7421
7422                         for (j = 0; j < obj->btf_module_cnt; j++) {
7423                                 btf = obj->btf_modules[j].btf;
7424                                 /* we assume module BTF FD is always >0 */
7425                                 btf_fd = obj->btf_modules[j].fd;
7426                                 id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
7427                                 if (id != -ENOENT)
7428                                         break;
7429                         }
7430                 }
7431                 if (id <= 0) {
7432                         pr_warn("extern (ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
7433                                 ext->name);
7434                         return -ESRCH;
7435                 }
7436
7437                 /* find local type_id */
7438                 local_type_id = ext->ksym.type_id;
7439
7440                 /* find target type_id */
7441                 targ_var = btf__type_by_id(btf, id);
7442                 targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
7443                 targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
7444
7445                 ret = bpf_core_types_are_compat(obj->btf, local_type_id,
7446                                                 btf, targ_type_id);
7447                 if (ret <= 0) {
7448                         const struct btf_type *local_type;
7449                         const char *targ_name, *local_name;
7450
7451                         local_type = btf__type_by_id(obj->btf, local_type_id);
7452                         local_name = btf__name_by_offset(obj->btf, local_type->name_off);
7453                         targ_name = btf__name_by_offset(btf, targ_type->name_off);
7454
7455                         pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7456                                 ext->name, local_type_id,
7457                                 btf_kind_str(local_type), local_name, targ_type_id,
7458                                 btf_kind_str(targ_type), targ_name);
7459                         return -EINVAL;
7460                 }
7461
7462                 ext->is_set = true;
7463                 ext->ksym.kernel_btf_obj_fd = btf_fd;
7464                 ext->ksym.kernel_btf_id = id;
7465                 pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
7466                          ext->name, id, btf_kind_str(targ_var), targ_var_name);
7467         }
7468         return 0;
7469 }
7470
7471 static int bpf_object__resolve_externs(struct bpf_object *obj,
7472                                        const char *extra_kconfig)
7473 {
7474         bool need_config = false, need_kallsyms = false;
7475         bool need_vmlinux_btf = false;
7476         struct extern_desc *ext;
7477         void *kcfg_data = NULL;
7478         int err, i;
7479
7480         if (obj->nr_extern == 0)
7481                 return 0;
7482
7483         if (obj->kconfig_map_idx >= 0)
7484                 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
7485
7486         for (i = 0; i < obj->nr_extern; i++) {
7487                 ext = &obj->externs[i];
7488
7489                 if (ext->type == EXT_KCFG &&
7490                     strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
7491                         void *ext_val = kcfg_data + ext->kcfg.data_off;
7492                         __u32 kver = get_kernel_version();
7493
7494                         if (!kver) {
7495                                 pr_warn("failed to get kernel version\n");
7496                                 return -EINVAL;
7497                         }
7498                         err = set_kcfg_value_num(ext, ext_val, kver);
7499                         if (err)
7500                                 return err;
7501                         pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
7502                 } else if (ext->type == EXT_KCFG &&
7503                            strncmp(ext->name, "CONFIG_", 7) == 0) {
7504                         need_config = true;
7505                 } else if (ext->type == EXT_KSYM) {
7506                         if (ext->ksym.type_id)
7507                                 need_vmlinux_btf = true;
7508                         else
7509                                 need_kallsyms = true;
7510                 } else {
7511                         pr_warn("unrecognized extern '%s'\n", ext->name);
7512                         return -EINVAL;
7513                 }
7514         }
7515         if (need_config && extra_kconfig) {
7516                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
7517                 if (err)
7518                         return -EINVAL;
7519                 need_config = false;
7520                 for (i = 0; i < obj->nr_extern; i++) {
7521                         ext = &obj->externs[i];
7522                         if (ext->type == EXT_KCFG && !ext->is_set) {
7523                                 need_config = true;
7524                                 break;
7525                         }
7526                 }
7527         }
7528         if (need_config) {
7529                 err = bpf_object__read_kconfig_file(obj, kcfg_data);
7530                 if (err)
7531                         return -EINVAL;
7532         }
7533         if (need_kallsyms) {
7534                 err = bpf_object__read_kallsyms_file(obj);
7535                 if (err)
7536                         return -EINVAL;
7537         }
7538         if (need_vmlinux_btf) {
7539                 err = bpf_object__resolve_ksyms_btf_id(obj);
7540                 if (err)
7541                         return -EINVAL;
7542         }
7543         for (i = 0; i < obj->nr_extern; i++) {
7544                 ext = &obj->externs[i];
7545
7546                 if (!ext->is_set && !ext->is_weak) {
7547                         pr_warn("extern %s (strong) not resolved\n", ext->name);
7548                         return -ESRCH;
7549                 } else if (!ext->is_set) {
7550                         pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
7551                                  ext->name);
7552                 }
7553         }
7554
7555         return 0;
7556 }
7557
7558 int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
7559 {
7560         struct bpf_object *obj;
7561         int err, i;
7562
7563         if (!attr)
7564                 return -EINVAL;
7565         obj = attr->obj;
7566         if (!obj)
7567                 return -EINVAL;
7568
7569         if (obj->loaded) {
7570                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
7571                 return -EINVAL;
7572         }
7573
7574         err = bpf_object__probe_loading(obj);
7575         err = err ? : bpf_object__load_vmlinux_btf(obj, false);
7576         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
7577         err = err ? : bpf_object__sanitize_and_load_btf(obj);
7578         err = err ? : bpf_object__sanitize_maps(obj);
7579         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
7580         err = err ? : bpf_object__create_maps(obj);
7581         err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
7582         err = err ? : bpf_object__load_progs(obj, attr->log_level);
7583
7584         /* clean up module BTFs */
7585         for (i = 0; i < obj->btf_module_cnt; i++) {
7586                 close(obj->btf_modules[i].fd);
7587                 btf__free(obj->btf_modules[i].btf);
7588                 free(obj->btf_modules[i].name);
7589         }
7590         free(obj->btf_modules);
7591
7592         /* clean up vmlinux BTF */
7593         btf__free(obj->btf_vmlinux);
7594         obj->btf_vmlinux = NULL;
7595
7596         obj->loaded = true; /* doesn't matter if successfully or not */
7597
7598         if (err)
7599                 goto out;
7600
7601         return 0;
7602 out:
7603         /* unpin any maps that were auto-pinned during load */
7604         for (i = 0; i < obj->nr_maps; i++)
7605                 if (obj->maps[i].pinned && !obj->maps[i].reused)
7606                         bpf_map__unpin(&obj->maps[i], NULL);
7607
7608         bpf_object__unload(obj);
7609         pr_warn("failed to load object '%s'\n", obj->path);
7610         return err;
7611 }
7612
7613 int bpf_object__load(struct bpf_object *obj)
7614 {
7615         struct bpf_object_load_attr attr = {
7616                 .obj = obj,
7617         };
7618
7619         return bpf_object__load_xattr(&attr);
7620 }
7621
7622 static int make_parent_dir(const char *path)
7623 {
7624         char *cp, errmsg[STRERR_BUFSIZE];
7625         char *dname, *dir;
7626         int err = 0;
7627
7628         dname = strdup(path);
7629         if (dname == NULL)
7630                 return -ENOMEM;
7631
7632         dir = dirname(dname);
7633         if (mkdir(dir, 0700) && errno != EEXIST)
7634                 err = -errno;
7635
7636         free(dname);
7637         if (err) {
7638                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7639                 pr_warn("failed to mkdir %s: %s\n", path, cp);
7640         }
7641         return err;
7642 }
7643
7644 static int check_path(const char *path)
7645 {
7646         char *cp, errmsg[STRERR_BUFSIZE];
7647         struct statfs st_fs;
7648         char *dname, *dir;
7649         int err = 0;
7650
7651         if (path == NULL)
7652                 return -EINVAL;
7653
7654         dname = strdup(path);
7655         if (dname == NULL)
7656                 return -ENOMEM;
7657
7658         dir = dirname(dname);
7659         if (statfs(dir, &st_fs)) {
7660                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7661                 pr_warn("failed to statfs %s: %s\n", dir, cp);
7662                 err = -errno;
7663         }
7664         free(dname);
7665
7666         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
7667                 pr_warn("specified path %s is not on BPF FS\n", path);
7668                 err = -EINVAL;
7669         }
7670
7671         return err;
7672 }
7673
7674 int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
7675                               int instance)
7676 {
7677         char *cp, errmsg[STRERR_BUFSIZE];
7678         int err;
7679
7680         err = make_parent_dir(path);
7681         if (err)
7682                 return err;
7683
7684         err = check_path(path);
7685         if (err)
7686                 return err;
7687
7688         if (prog == NULL) {
7689                 pr_warn("invalid program pointer\n");
7690                 return -EINVAL;
7691         }
7692
7693         if (instance < 0 || instance >= prog->instances.nr) {
7694                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
7695                         instance, prog->name, prog->instances.nr);
7696                 return -EINVAL;
7697         }
7698
7699         if (bpf_obj_pin(prog->instances.fds[instance], path)) {
7700                 err = -errno;
7701                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
7702                 pr_warn("failed to pin program: %s\n", cp);
7703                 return err;
7704         }
7705         pr_debug("pinned program '%s'\n", path);
7706
7707         return 0;
7708 }
7709
7710 int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
7711                                 int instance)
7712 {
7713         int err;
7714
7715         err = check_path(path);
7716         if (err)
7717                 return err;
7718
7719         if (prog == NULL) {
7720                 pr_warn("invalid program pointer\n");
7721                 return -EINVAL;
7722         }
7723
7724         if (instance < 0 || instance >= prog->instances.nr) {
7725                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
7726                         instance, prog->name, prog->instances.nr);
7727                 return -EINVAL;
7728         }
7729
7730         err = unlink(path);
7731         if (err != 0)
7732                 return -errno;
7733         pr_debug("unpinned program '%s'\n", path);
7734
7735         return 0;
7736 }
7737
7738 int bpf_program__pin(struct bpf_program *prog, const char *path)
7739 {
7740         int i, err;
7741
7742         err = make_parent_dir(path);
7743         if (err)
7744                 return err;
7745
7746         err = check_path(path);
7747         if (err)
7748                 return err;
7749
7750         if (prog == NULL) {
7751                 pr_warn("invalid program pointer\n");
7752                 return -EINVAL;
7753         }
7754
7755         if (prog->instances.nr <= 0) {
7756                 pr_warn("no instances of prog %s to pin\n", prog->name);
7757                 return -EINVAL;
7758         }
7759
7760         if (prog->instances.nr == 1) {
7761                 /* don't create subdirs when pinning single instance */
7762                 return bpf_program__pin_instance(prog, path, 0);
7763         }
7764
7765         for (i = 0; i < prog->instances.nr; i++) {
7766                 char buf[PATH_MAX];
7767                 int len;
7768
7769                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
7770                 if (len < 0) {
7771                         err = -EINVAL;
7772                         goto err_unpin;
7773                 } else if (len >= PATH_MAX) {
7774                         err = -ENAMETOOLONG;
7775                         goto err_unpin;
7776                 }
7777
7778                 err = bpf_program__pin_instance(prog, buf, i);
7779                 if (err)
7780                         goto err_unpin;
7781         }
7782
7783         return 0;
7784
7785 err_unpin:
7786         for (i = i - 1; i >= 0; i--) {
7787                 char buf[PATH_MAX];
7788                 int len;
7789
7790                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
7791                 if (len < 0)
7792                         continue;
7793                 else if (len >= PATH_MAX)
7794                         continue;
7795
7796                 bpf_program__unpin_instance(prog, buf, i);
7797         }
7798
7799         rmdir(path);
7800
7801         return err;
7802 }
7803
7804 int bpf_program__unpin(struct bpf_program *prog, const char *path)
7805 {
7806         int i, err;
7807
7808         err = check_path(path);
7809         if (err)
7810                 return err;
7811
7812         if (prog == NULL) {
7813                 pr_warn("invalid program pointer\n");
7814                 return -EINVAL;
7815         }
7816
7817         if (prog->instances.nr <= 0) {
7818                 pr_warn("no instances of prog %s to pin\n", prog->name);
7819                 return -EINVAL;
7820         }
7821
7822         if (prog->instances.nr == 1) {
7823                 /* don't create subdirs when pinning single instance */
7824                 return bpf_program__unpin_instance(prog, path, 0);
7825         }
7826
7827         for (i = 0; i < prog->instances.nr; i++) {
7828                 char buf[PATH_MAX];
7829                 int len;
7830
7831                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
7832                 if (len < 0)
7833                         return -EINVAL;
7834                 else if (len >= PATH_MAX)
7835                         return -ENAMETOOLONG;
7836
7837                 err = bpf_program__unpin_instance(prog, buf, i);
7838                 if (err)
7839                         return err;
7840         }
7841
7842         err = rmdir(path);
7843         if (err)
7844                 return -errno;
7845
7846         return 0;
7847 }
7848
7849 int bpf_map__pin(struct bpf_map *map, const char *path)
7850 {
7851         char *cp, errmsg[STRERR_BUFSIZE];
7852         int err;
7853
7854         if (map == NULL) {
7855                 pr_warn("invalid map pointer\n");
7856                 return -EINVAL;
7857         }
7858
7859         if (map->pin_path) {
7860                 if (path && strcmp(path, map->pin_path)) {
7861                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7862                                 bpf_map__name(map), map->pin_path, path);
7863                         return -EINVAL;
7864                 } else if (map->pinned) {
7865                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
7866                                  bpf_map__name(map), map->pin_path);
7867                         return 0;
7868                 }
7869         } else {
7870                 if (!path) {
7871                         pr_warn("missing a path to pin map '%s' at\n",
7872                                 bpf_map__name(map));
7873                         return -EINVAL;
7874                 } else if (map->pinned) {
7875                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
7876                         return -EEXIST;
7877                 }
7878
7879                 map->pin_path = strdup(path);
7880                 if (!map->pin_path) {
7881                         err = -errno;
7882                         goto out_err;
7883                 }
7884         }
7885
7886         err = make_parent_dir(map->pin_path);
7887         if (err)
7888                 return err;
7889
7890         err = check_path(map->pin_path);
7891         if (err)
7892                 return err;
7893
7894         if (bpf_obj_pin(map->fd, map->pin_path)) {
7895                 err = -errno;
7896                 goto out_err;
7897         }
7898
7899         map->pinned = true;
7900         pr_debug("pinned map '%s'\n", map->pin_path);
7901
7902         return 0;
7903
7904 out_err:
7905         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7906         pr_warn("failed to pin map: %s\n", cp);
7907         return err;
7908 }
7909
7910 int bpf_map__unpin(struct bpf_map *map, const char *path)
7911 {
7912         int err;
7913
7914         if (map == NULL) {
7915                 pr_warn("invalid map pointer\n");
7916                 return -EINVAL;
7917         }
7918
7919         if (map->pin_path) {
7920                 if (path && strcmp(path, map->pin_path)) {
7921                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7922                                 bpf_map__name(map), map->pin_path, path);
7923                         return -EINVAL;
7924                 }
7925                 path = map->pin_path;
7926         } else if (!path) {
7927                 pr_warn("no path to unpin map '%s' from\n",
7928                         bpf_map__name(map));
7929                 return -EINVAL;
7930         }
7931
7932         err = check_path(path);
7933         if (err)
7934                 return err;
7935
7936         err = unlink(path);
7937         if (err != 0)
7938                 return -errno;
7939
7940         map->pinned = false;
7941         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
7942
7943         return 0;
7944 }
7945
7946 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
7947 {
7948         char *new = NULL;
7949
7950         if (path) {
7951                 new = strdup(path);
7952                 if (!new)
7953                         return -errno;
7954         }
7955
7956         free(map->pin_path);
7957         map->pin_path = new;
7958         return 0;
7959 }
7960
7961 const char *bpf_map__get_pin_path(const struct bpf_map *map)
7962 {
7963         return map->pin_path;
7964 }
7965
7966 bool bpf_map__is_pinned(const struct bpf_map *map)
7967 {
7968         return map->pinned;
7969 }
7970
7971 static void sanitize_pin_path(char *s)
7972 {
7973         /* bpffs disallows periods in path names */
7974         while (*s) {
7975                 if (*s == '.')
7976                         *s = '_';
7977                 s++;
7978         }
7979 }
7980
7981 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
7982 {
7983         struct bpf_map *map;
7984         int err;
7985
7986         if (!obj)
7987                 return -ENOENT;
7988
7989         if (!obj->loaded) {
7990                 pr_warn("object not yet loaded; load it first\n");
7991                 return -ENOENT;
7992         }
7993
7994         bpf_object__for_each_map(map, obj) {
7995                 char *pin_path = NULL;
7996                 char buf[PATH_MAX];
7997
7998                 if (path) {
7999                         int len;
8000
8001                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
8002                                        bpf_map__name(map));
8003                         if (len < 0) {
8004                                 err = -EINVAL;
8005                                 goto err_unpin_maps;
8006                         } else if (len >= PATH_MAX) {
8007                                 err = -ENAMETOOLONG;
8008                                 goto err_unpin_maps;
8009                         }
8010                         sanitize_pin_path(buf);
8011                         pin_path = buf;
8012                 } else if (!map->pin_path) {
8013                         continue;
8014                 }
8015
8016                 err = bpf_map__pin(map, pin_path);
8017                 if (err)
8018                         goto err_unpin_maps;
8019         }
8020
8021         return 0;
8022
8023 err_unpin_maps:
8024         while ((map = bpf_map__prev(map, obj))) {
8025                 if (!map->pin_path)
8026                         continue;
8027
8028                 bpf_map__unpin(map, NULL);
8029         }
8030
8031         return err;
8032 }
8033
8034 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
8035 {
8036         struct bpf_map *map;
8037         int err;
8038
8039         if (!obj)
8040                 return -ENOENT;
8041
8042         bpf_object__for_each_map(map, obj) {
8043                 char *pin_path = NULL;
8044                 char buf[PATH_MAX];
8045
8046                 if (path) {
8047                         int len;
8048
8049                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
8050                                        bpf_map__name(map));
8051                         if (len < 0)
8052                                 return -EINVAL;
8053                         else if (len >= PATH_MAX)
8054                                 return -ENAMETOOLONG;
8055                         sanitize_pin_path(buf);
8056                         pin_path = buf;
8057                 } else if (!map->pin_path) {
8058                         continue;
8059                 }
8060
8061                 err = bpf_map__unpin(map, pin_path);
8062                 if (err)
8063                         return err;
8064         }
8065
8066         return 0;
8067 }
8068
8069 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
8070 {
8071         struct bpf_program *prog;
8072         int err;
8073
8074         if (!obj)
8075                 return -ENOENT;
8076
8077         if (!obj->loaded) {
8078                 pr_warn("object not yet loaded; load it first\n");
8079                 return -ENOENT;
8080         }
8081
8082         bpf_object__for_each_program(prog, obj) {
8083                 char buf[PATH_MAX];
8084                 int len;
8085
8086                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
8087                                prog->pin_name);
8088                 if (len < 0) {
8089                         err = -EINVAL;
8090                         goto err_unpin_programs;
8091                 } else if (len >= PATH_MAX) {
8092                         err = -ENAMETOOLONG;
8093                         goto err_unpin_programs;
8094                 }
8095
8096                 err = bpf_program__pin(prog, buf);
8097                 if (err)
8098                         goto err_unpin_programs;
8099         }
8100
8101         return 0;
8102
8103 err_unpin_programs:
8104         while ((prog = bpf_program__prev(prog, obj))) {
8105                 char buf[PATH_MAX];
8106                 int len;
8107
8108                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
8109                                prog->pin_name);
8110                 if (len < 0)
8111                         continue;
8112                 else if (len >= PATH_MAX)
8113                         continue;
8114
8115                 bpf_program__unpin(prog, buf);
8116         }
8117
8118         return err;
8119 }
8120
8121 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8122 {
8123         struct bpf_program *prog;
8124         int err;
8125
8126         if (!obj)
8127                 return -ENOENT;
8128
8129         bpf_object__for_each_program(prog, obj) {
8130                 char buf[PATH_MAX];
8131                 int len;
8132
8133                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
8134                                prog->pin_name);
8135                 if (len < 0)
8136                         return -EINVAL;
8137                 else if (len >= PATH_MAX)
8138                         return -ENAMETOOLONG;
8139
8140                 err = bpf_program__unpin(prog, buf);
8141                 if (err)
8142                         return err;
8143         }
8144
8145         return 0;
8146 }
8147
8148 int bpf_object__pin(struct bpf_object *obj, const char *path)
8149 {
8150         int err;
8151
8152         err = bpf_object__pin_maps(obj, path);
8153         if (err)
8154                 return err;
8155
8156         err = bpf_object__pin_programs(obj, path);
8157         if (err) {
8158                 bpf_object__unpin_maps(obj, path);
8159                 return err;
8160         }
8161
8162         return 0;
8163 }
8164
8165 static void bpf_map__destroy(struct bpf_map *map)
8166 {
8167         if (map->clear_priv)
8168                 map->clear_priv(map, map->priv);
8169         map->priv = NULL;
8170         map->clear_priv = NULL;
8171
8172         if (map->inner_map) {
8173                 bpf_map__destroy(map->inner_map);
8174                 zfree(&map->inner_map);
8175         }
8176
8177         zfree(&map->init_slots);
8178         map->init_slots_sz = 0;
8179
8180         if (map->mmaped) {
8181                 munmap(map->mmaped, bpf_map_mmap_sz(map));
8182                 map->mmaped = NULL;
8183         }
8184
8185         if (map->st_ops) {
8186                 zfree(&map->st_ops->data);
8187                 zfree(&map->st_ops->progs);
8188                 zfree(&map->st_ops->kern_func_off);
8189                 zfree(&map->st_ops);
8190         }
8191
8192         zfree(&map->name);
8193         zfree(&map->pin_path);
8194
8195         if (map->fd >= 0)
8196                 zclose(map->fd);
8197 }
8198
8199 void bpf_object__close(struct bpf_object *obj)
8200 {
8201         size_t i;
8202
8203         if (IS_ERR_OR_NULL(obj))
8204                 return;
8205
8206         if (obj->clear_priv)
8207                 obj->clear_priv(obj, obj->priv);
8208
8209         bpf_object__elf_finish(obj);
8210         bpf_object__unload(obj);
8211         btf__free(obj->btf);
8212         btf_ext__free(obj->btf_ext);
8213
8214         for (i = 0; i < obj->nr_maps; i++)
8215                 bpf_map__destroy(&obj->maps[i]);
8216
8217         zfree(&obj->kconfig);
8218         zfree(&obj->externs);
8219         obj->nr_extern = 0;
8220
8221         zfree(&obj->maps);
8222         obj->nr_maps = 0;
8223
8224         if (obj->programs && obj->nr_programs) {
8225                 for (i = 0; i < obj->nr_programs; i++)
8226                         bpf_program__exit(&obj->programs[i]);
8227         }
8228         zfree(&obj->programs);
8229
8230         list_del(&obj->list);
8231         free(obj);
8232 }
8233
8234 struct bpf_object *
8235 bpf_object__next(struct bpf_object *prev)
8236 {
8237         struct bpf_object *next;
8238
8239         if (!prev)
8240                 next = list_first_entry(&bpf_objects_list,
8241                                         struct bpf_object,
8242                                         list);
8243         else
8244                 next = list_next_entry(prev, list);
8245
8246         /* Empty list is noticed here so don't need checking on entry. */
8247         if (&next->list == &bpf_objects_list)
8248                 return NULL;
8249
8250         return next;
8251 }
8252
8253 const char *bpf_object__name(const struct bpf_object *obj)
8254 {
8255         return obj ? obj->name : ERR_PTR(-EINVAL);
8256 }
8257
8258 unsigned int bpf_object__kversion(const struct bpf_object *obj)
8259 {
8260         return obj ? obj->kern_version : 0;
8261 }
8262
8263 struct btf *bpf_object__btf(const struct bpf_object *obj)
8264 {
8265         return obj ? obj->btf : NULL;
8266 }
8267
8268 int bpf_object__btf_fd(const struct bpf_object *obj)
8269 {
8270         return obj->btf ? btf__fd(obj->btf) : -1;
8271 }
8272
8273 int bpf_object__set_priv(struct bpf_object *obj, void *priv,
8274                          bpf_object_clear_priv_t clear_priv)
8275 {
8276         if (obj->priv && obj->clear_priv)
8277                 obj->clear_priv(obj, obj->priv);
8278
8279         obj->priv = priv;
8280         obj->clear_priv = clear_priv;
8281         return 0;
8282 }
8283
8284 void *bpf_object__priv(const struct bpf_object *obj)
8285 {
8286         return obj ? obj->priv : ERR_PTR(-EINVAL);
8287 }
8288
8289 static struct bpf_program *
8290 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8291                     bool forward)
8292 {
8293         size_t nr_programs = obj->nr_programs;
8294         ssize_t idx;
8295
8296         if (!nr_programs)
8297                 return NULL;
8298
8299         if (!p)
8300                 /* Iter from the beginning */
8301                 return forward ? &obj->programs[0] :
8302                         &obj->programs[nr_programs - 1];
8303
8304         if (p->obj != obj) {
8305                 pr_warn("error: program handler doesn't match object\n");
8306                 return NULL;
8307         }
8308
8309         idx = (p - obj->programs) + (forward ? 1 : -1);
8310         if (idx >= obj->nr_programs || idx < 0)
8311                 return NULL;
8312         return &obj->programs[idx];
8313 }
8314
8315 struct bpf_program *
8316 bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
8317 {
8318         struct bpf_program *prog = prev;
8319
8320         do {
8321                 prog = __bpf_program__iter(prog, obj, true);
8322         } while (prog && prog_is_subprog(obj, prog));
8323
8324         return prog;
8325 }
8326
8327 struct bpf_program *
8328 bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
8329 {
8330         struct bpf_program *prog = next;
8331
8332         do {
8333                 prog = __bpf_program__iter(prog, obj, false);
8334         } while (prog && prog_is_subprog(obj, prog));
8335
8336         return prog;
8337 }
8338
8339 int bpf_program__set_priv(struct bpf_program *prog, void *priv,
8340                           bpf_program_clear_priv_t clear_priv)
8341 {
8342         if (prog->priv && prog->clear_priv)
8343                 prog->clear_priv(prog, prog->priv);
8344
8345         prog->priv = priv;
8346         prog->clear_priv = clear_priv;
8347         return 0;
8348 }
8349
8350 void *bpf_program__priv(const struct bpf_program *prog)
8351 {
8352         return prog ? prog->priv : ERR_PTR(-EINVAL);
8353 }
8354
8355 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
8356 {
8357         prog->prog_ifindex = ifindex;
8358 }
8359
8360 const char *bpf_program__name(const struct bpf_program *prog)
8361 {
8362         return prog->name;
8363 }
8364
8365 const char *bpf_program__section_name(const struct bpf_program *prog)
8366 {
8367         return prog->sec_name;
8368 }
8369
8370 const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
8371 {
8372         const char *title;
8373
8374         title = prog->sec_name;
8375         if (needs_copy) {
8376                 title = strdup(title);
8377                 if (!title) {
8378                         pr_warn("failed to strdup program title\n");
8379                         return ERR_PTR(-ENOMEM);
8380                 }
8381         }
8382
8383         return title;
8384 }
8385
8386 bool bpf_program__autoload(const struct bpf_program *prog)
8387 {
8388         return prog->load;
8389 }
8390
8391 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8392 {
8393         if (prog->obj->loaded)
8394                 return -EINVAL;
8395
8396         prog->load = autoload;
8397         return 0;
8398 }
8399
8400 int bpf_program__fd(const struct bpf_program *prog)
8401 {
8402         return bpf_program__nth_fd(prog, 0);
8403 }
8404
8405 size_t bpf_program__size(const struct bpf_program *prog)
8406 {
8407         return prog->insns_cnt * BPF_INSN_SZ;
8408 }
8409
8410 int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
8411                           bpf_program_prep_t prep)
8412 {
8413         int *instances_fds;
8414
8415         if (nr_instances <= 0 || !prep)
8416                 return -EINVAL;
8417
8418         if (prog->instances.nr > 0 || prog->instances.fds) {
8419                 pr_warn("Can't set pre-processor after loading\n");
8420                 return -EINVAL;
8421         }
8422
8423         instances_fds = malloc(sizeof(int) * nr_instances);
8424         if (!instances_fds) {
8425                 pr_warn("alloc memory failed for fds\n");
8426                 return -ENOMEM;
8427         }
8428
8429         /* fill all fd with -1 */
8430         memset(instances_fds, -1, sizeof(int) * nr_instances);
8431
8432         prog->instances.nr = nr_instances;
8433         prog->instances.fds = instances_fds;
8434         prog->preprocessor = prep;
8435         return 0;
8436 }
8437
8438 int bpf_program__nth_fd(const struct bpf_program *prog, int n)
8439 {
8440         int fd;
8441
8442         if (!prog)
8443                 return -EINVAL;
8444
8445         if (n >= prog->instances.nr || n < 0) {
8446                 pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
8447                         n, prog->name, prog->instances.nr);
8448                 return -EINVAL;
8449         }
8450
8451         fd = prog->instances.fds[n];
8452         if (fd < 0) {
8453                 pr_warn("%dth instance of program '%s' is invalid\n",
8454                         n, prog->name);
8455                 return -ENOENT;
8456         }
8457
8458         return fd;
8459 }
8460
8461 enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
8462 {
8463         return prog->type;
8464 }
8465
8466 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
8467 {
8468         prog->type = type;
8469 }
8470
8471 static bool bpf_program__is_type(const struct bpf_program *prog,
8472                                  enum bpf_prog_type type)
8473 {
8474         return prog ? (prog->type == type) : false;
8475 }
8476
8477 #define BPF_PROG_TYPE_FNS(NAME, TYPE)                           \
8478 int bpf_program__set_##NAME(struct bpf_program *prog)           \
8479 {                                                               \
8480         if (!prog)                                              \
8481                 return -EINVAL;                                 \
8482         bpf_program__set_type(prog, TYPE);                      \
8483         return 0;                                               \
8484 }                                                               \
8485                                                                 \
8486 bool bpf_program__is_##NAME(const struct bpf_program *prog)     \
8487 {                                                               \
8488         return bpf_program__is_type(prog, TYPE);                \
8489 }                                                               \
8490
8491 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
8492 BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
8493 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
8494 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
8495 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
8496 BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
8497 BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
8498 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
8499 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
8500 BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
8501 BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
8502 BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
8503 BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
8504
8505 enum bpf_attach_type
8506 bpf_program__get_expected_attach_type(struct bpf_program *prog)
8507 {
8508         return prog->expected_attach_type;
8509 }
8510
8511 void bpf_program__set_expected_attach_type(struct bpf_program *prog,
8512                                            enum bpf_attach_type type)
8513 {
8514         prog->expected_attach_type = type;
8515 }
8516
8517 #define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional,           \
8518                           attachable, attach_btf)                           \
8519         {                                                                   \
8520                 .sec = string,                                              \
8521                 .len = sizeof(string) - 1,                                  \
8522                 .prog_type = ptype,                                         \
8523                 .expected_attach_type = eatype,                             \
8524                 .is_exp_attach_type_optional = eatype_optional,             \
8525                 .is_attachable = attachable,                                \
8526                 .is_attach_btf = attach_btf,                                \
8527         }
8528
8529 /* Programs that can NOT be attached. */
8530 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
8531
8532 /* Programs that can be attached. */
8533 #define BPF_APROG_SEC(string, ptype, atype) \
8534         BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0)
8535
8536 /* Programs that must specify expected attach type at load time. */
8537 #define BPF_EAPROG_SEC(string, ptype, eatype) \
8538         BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0)
8539
8540 /* Programs that use BTF to identify attach point */
8541 #define BPF_PROG_BTF(string, ptype, eatype) \
8542         BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1)
8543
8544 /* Programs that can be attached but attach type can't be identified by section
8545  * name. Kept for backward compatibility.
8546  */
8547 #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
8548
8549 #define SEC_DEF(sec_pfx, ptype, ...) {                                      \
8550         .sec = sec_pfx,                                                     \
8551         .len = sizeof(sec_pfx) - 1,                                         \
8552         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
8553         __VA_ARGS__                                                         \
8554 }
8555
8556 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
8557                                       struct bpf_program *prog);
8558 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
8559                                   struct bpf_program *prog);
8560 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
8561                                       struct bpf_program *prog);
8562 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
8563                                      struct bpf_program *prog);
8564 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
8565                                    struct bpf_program *prog);
8566 static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
8567                                     struct bpf_program *prog);
8568
8569 static const struct bpf_sec_def section_defs[] = {
8570         BPF_PROG_SEC("socket",                  BPF_PROG_TYPE_SOCKET_FILTER),
8571         BPF_PROG_SEC("sk_reuseport",            BPF_PROG_TYPE_SK_REUSEPORT),
8572         SEC_DEF("kprobe/", KPROBE,
8573                 .attach_fn = attach_kprobe),
8574         BPF_PROG_SEC("uprobe/",                 BPF_PROG_TYPE_KPROBE),
8575         SEC_DEF("kretprobe/", KPROBE,
8576                 .attach_fn = attach_kprobe),
8577         BPF_PROG_SEC("uretprobe/",              BPF_PROG_TYPE_KPROBE),
8578         BPF_PROG_SEC("classifier",              BPF_PROG_TYPE_SCHED_CLS),
8579         BPF_PROG_SEC("action",                  BPF_PROG_TYPE_SCHED_ACT),
8580         SEC_DEF("tracepoint/", TRACEPOINT,
8581                 .attach_fn = attach_tp),
8582         SEC_DEF("tp/", TRACEPOINT,
8583                 .attach_fn = attach_tp),
8584         SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
8585                 .attach_fn = attach_raw_tp),
8586         SEC_DEF("raw_tp/", RAW_TRACEPOINT,
8587                 .attach_fn = attach_raw_tp),
8588         SEC_DEF("tp_btf/", TRACING,
8589                 .expected_attach_type = BPF_TRACE_RAW_TP,
8590                 .is_attach_btf = true,
8591                 .attach_fn = attach_trace),
8592         SEC_DEF("fentry/", TRACING,
8593                 .expected_attach_type = BPF_TRACE_FENTRY,
8594                 .is_attach_btf = true,
8595                 .attach_fn = attach_trace),
8596         SEC_DEF("fmod_ret/", TRACING,
8597                 .expected_attach_type = BPF_MODIFY_RETURN,
8598                 .is_attach_btf = true,
8599                 .attach_fn = attach_trace),
8600         SEC_DEF("fexit/", TRACING,
8601                 .expected_attach_type = BPF_TRACE_FEXIT,
8602                 .is_attach_btf = true,
8603                 .attach_fn = attach_trace),
8604         SEC_DEF("fentry.s/", TRACING,
8605                 .expected_attach_type = BPF_TRACE_FENTRY,
8606                 .is_attach_btf = true,
8607                 .is_sleepable = true,
8608                 .attach_fn = attach_trace),
8609         SEC_DEF("fmod_ret.s/", TRACING,
8610                 .expected_attach_type = BPF_MODIFY_RETURN,
8611                 .is_attach_btf = true,
8612                 .is_sleepable = true,
8613                 .attach_fn = attach_trace),
8614         SEC_DEF("fexit.s/", TRACING,
8615                 .expected_attach_type = BPF_TRACE_FEXIT,
8616                 .is_attach_btf = true,
8617                 .is_sleepable = true,
8618                 .attach_fn = attach_trace),
8619         SEC_DEF("freplace/", EXT,
8620                 .is_attach_btf = true,
8621                 .attach_fn = attach_trace),
8622         SEC_DEF("lsm/", LSM,
8623                 .is_attach_btf = true,
8624                 .expected_attach_type = BPF_LSM_MAC,
8625                 .attach_fn = attach_lsm),
8626         SEC_DEF("lsm.s/", LSM,
8627                 .is_attach_btf = true,
8628                 .is_sleepable = true,
8629                 .expected_attach_type = BPF_LSM_MAC,
8630                 .attach_fn = attach_lsm),
8631         SEC_DEF("iter/", TRACING,
8632                 .expected_attach_type = BPF_TRACE_ITER,
8633                 .is_attach_btf = true,
8634                 .attach_fn = attach_iter),
8635         BPF_EAPROG_SEC("xdp_devmap/",           BPF_PROG_TYPE_XDP,
8636                                                 BPF_XDP_DEVMAP),
8637         BPF_EAPROG_SEC("xdp_cpumap/",           BPF_PROG_TYPE_XDP,
8638                                                 BPF_XDP_CPUMAP),
8639         BPF_APROG_SEC("xdp",                    BPF_PROG_TYPE_XDP,
8640                                                 BPF_XDP),
8641         BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
8642         BPF_PROG_SEC("lwt_in",                  BPF_PROG_TYPE_LWT_IN),
8643         BPF_PROG_SEC("lwt_out",                 BPF_PROG_TYPE_LWT_OUT),
8644         BPF_PROG_SEC("lwt_xmit",                BPF_PROG_TYPE_LWT_XMIT),
8645         BPF_PROG_SEC("lwt_seg6local",           BPF_PROG_TYPE_LWT_SEG6LOCAL),
8646         BPF_APROG_SEC("cgroup_skb/ingress",     BPF_PROG_TYPE_CGROUP_SKB,
8647                                                 BPF_CGROUP_INET_INGRESS),
8648         BPF_APROG_SEC("cgroup_skb/egress",      BPF_PROG_TYPE_CGROUP_SKB,
8649                                                 BPF_CGROUP_INET_EGRESS),
8650         BPF_APROG_COMPAT("cgroup/skb",          BPF_PROG_TYPE_CGROUP_SKB),
8651         BPF_EAPROG_SEC("cgroup/sock_create",    BPF_PROG_TYPE_CGROUP_SOCK,
8652                                                 BPF_CGROUP_INET_SOCK_CREATE),
8653         BPF_EAPROG_SEC("cgroup/sock_release",   BPF_PROG_TYPE_CGROUP_SOCK,
8654                                                 BPF_CGROUP_INET_SOCK_RELEASE),
8655         BPF_APROG_SEC("cgroup/sock",            BPF_PROG_TYPE_CGROUP_SOCK,
8656                                                 BPF_CGROUP_INET_SOCK_CREATE),
8657         BPF_EAPROG_SEC("cgroup/post_bind4",     BPF_PROG_TYPE_CGROUP_SOCK,
8658                                                 BPF_CGROUP_INET4_POST_BIND),
8659         BPF_EAPROG_SEC("cgroup/post_bind6",     BPF_PROG_TYPE_CGROUP_SOCK,
8660                                                 BPF_CGROUP_INET6_POST_BIND),
8661         BPF_APROG_SEC("cgroup/dev",             BPF_PROG_TYPE_CGROUP_DEVICE,
8662                                                 BPF_CGROUP_DEVICE),
8663         BPF_APROG_SEC("sockops",                BPF_PROG_TYPE_SOCK_OPS,
8664                                                 BPF_CGROUP_SOCK_OPS),
8665         BPF_APROG_SEC("sk_skb/stream_parser",   BPF_PROG_TYPE_SK_SKB,
8666                                                 BPF_SK_SKB_STREAM_PARSER),
8667         BPF_APROG_SEC("sk_skb/stream_verdict",  BPF_PROG_TYPE_SK_SKB,
8668                                                 BPF_SK_SKB_STREAM_VERDICT),
8669         BPF_APROG_COMPAT("sk_skb",              BPF_PROG_TYPE_SK_SKB),
8670         BPF_APROG_SEC("sk_msg",                 BPF_PROG_TYPE_SK_MSG,
8671                                                 BPF_SK_MSG_VERDICT),
8672         BPF_APROG_SEC("lirc_mode2",             BPF_PROG_TYPE_LIRC_MODE2,
8673                                                 BPF_LIRC_MODE2),
8674         BPF_APROG_SEC("flow_dissector",         BPF_PROG_TYPE_FLOW_DISSECTOR,
8675                                                 BPF_FLOW_DISSECTOR),
8676         BPF_EAPROG_SEC("cgroup/bind4",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8677                                                 BPF_CGROUP_INET4_BIND),
8678         BPF_EAPROG_SEC("cgroup/bind6",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8679                                                 BPF_CGROUP_INET6_BIND),
8680         BPF_EAPROG_SEC("cgroup/connect4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8681                                                 BPF_CGROUP_INET4_CONNECT),
8682         BPF_EAPROG_SEC("cgroup/connect6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8683                                                 BPF_CGROUP_INET6_CONNECT),
8684         BPF_EAPROG_SEC("cgroup/sendmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8685                                                 BPF_CGROUP_UDP4_SENDMSG),
8686         BPF_EAPROG_SEC("cgroup/sendmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8687                                                 BPF_CGROUP_UDP6_SENDMSG),
8688         BPF_EAPROG_SEC("cgroup/recvmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8689                                                 BPF_CGROUP_UDP4_RECVMSG),
8690         BPF_EAPROG_SEC("cgroup/recvmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8691                                                 BPF_CGROUP_UDP6_RECVMSG),
8692         BPF_EAPROG_SEC("cgroup/getpeername4",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8693                                                 BPF_CGROUP_INET4_GETPEERNAME),
8694         BPF_EAPROG_SEC("cgroup/getpeername6",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8695                                                 BPF_CGROUP_INET6_GETPEERNAME),
8696         BPF_EAPROG_SEC("cgroup/getsockname4",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8697                                                 BPF_CGROUP_INET4_GETSOCKNAME),
8698         BPF_EAPROG_SEC("cgroup/getsockname6",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8699                                                 BPF_CGROUP_INET6_GETSOCKNAME),
8700         BPF_EAPROG_SEC("cgroup/sysctl",         BPF_PROG_TYPE_CGROUP_SYSCTL,
8701                                                 BPF_CGROUP_SYSCTL),
8702         BPF_EAPROG_SEC("cgroup/getsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
8703                                                 BPF_CGROUP_GETSOCKOPT),
8704         BPF_EAPROG_SEC("cgroup/setsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
8705                                                 BPF_CGROUP_SETSOCKOPT),
8706         BPF_PROG_SEC("struct_ops",              BPF_PROG_TYPE_STRUCT_OPS),
8707         BPF_EAPROG_SEC("sk_lookup/",            BPF_PROG_TYPE_SK_LOOKUP,
8708                                                 BPF_SK_LOOKUP),
8709 };
8710
8711 #undef BPF_PROG_SEC_IMPL
8712 #undef BPF_PROG_SEC
8713 #undef BPF_APROG_SEC
8714 #undef BPF_EAPROG_SEC
8715 #undef BPF_APROG_COMPAT
8716 #undef SEC_DEF
8717
8718 #define MAX_TYPE_NAME_SIZE 32
8719
8720 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
8721 {
8722         int i, n = ARRAY_SIZE(section_defs);
8723
8724         for (i = 0; i < n; i++) {
8725                 if (strncmp(sec_name,
8726                             section_defs[i].sec, section_defs[i].len))
8727                         continue;
8728                 return &section_defs[i];
8729         }
8730         return NULL;
8731 }
8732
8733 static char *libbpf_get_type_names(bool attach_type)
8734 {
8735         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
8736         char *buf;
8737
8738         buf = malloc(len);
8739         if (!buf)
8740                 return NULL;
8741
8742         buf[0] = '\0';
8743         /* Forge string buf with all available names */
8744         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8745                 if (attach_type && !section_defs[i].is_attachable)
8746                         continue;
8747
8748                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
8749                         free(buf);
8750                         return NULL;
8751                 }
8752                 strcat(buf, " ");
8753                 strcat(buf, section_defs[i].sec);
8754         }
8755
8756         return buf;
8757 }
8758
8759 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
8760                              enum bpf_attach_type *expected_attach_type)
8761 {
8762         const struct bpf_sec_def *sec_def;
8763         char *type_names;
8764
8765         if (!name)
8766                 return -EINVAL;
8767
8768         sec_def = find_sec_def(name);
8769         if (sec_def) {
8770                 *prog_type = sec_def->prog_type;
8771                 *expected_attach_type = sec_def->expected_attach_type;
8772                 return 0;
8773         }
8774
8775         pr_debug("failed to guess program type from ELF section '%s'\n", name);
8776         type_names = libbpf_get_type_names(false);
8777         if (type_names != NULL) {
8778                 pr_debug("supported section(type) names are:%s\n", type_names);
8779                 free(type_names);
8780         }
8781
8782         return -ESRCH;
8783 }
8784
8785 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
8786                                                      size_t offset)
8787 {
8788         struct bpf_map *map;
8789         size_t i;
8790
8791         for (i = 0; i < obj->nr_maps; i++) {
8792                 map = &obj->maps[i];
8793                 if (!bpf_map__is_struct_ops(map))
8794                         continue;
8795                 if (map->sec_offset <= offset &&
8796                     offset - map->sec_offset < map->def.value_size)
8797                         return map;
8798         }
8799
8800         return NULL;
8801 }
8802
8803 /* Collect the reloc from ELF and populate the st_ops->progs[] */
8804 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
8805                                             GElf_Shdr *shdr, Elf_Data *data)
8806 {
8807         const struct btf_member *member;
8808         struct bpf_struct_ops *st_ops;
8809         struct bpf_program *prog;
8810         unsigned int shdr_idx;
8811         const struct btf *btf;
8812         struct bpf_map *map;
8813         Elf_Data *symbols;
8814         unsigned int moff, insn_idx;
8815         const char *name;
8816         __u32 member_idx;
8817         GElf_Sym sym;
8818         GElf_Rel rel;
8819         int i, nrels;
8820
8821         symbols = obj->efile.symbols;
8822         btf = obj->btf;
8823         nrels = shdr->sh_size / shdr->sh_entsize;
8824         for (i = 0; i < nrels; i++) {
8825                 if (!gelf_getrel(data, i, &rel)) {
8826                         pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
8827                         return -LIBBPF_ERRNO__FORMAT;
8828                 }
8829
8830                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
8831                         pr_warn("struct_ops reloc: symbol %zx not found\n",
8832                                 (size_t)GELF_R_SYM(rel.r_info));
8833                         return -LIBBPF_ERRNO__FORMAT;
8834                 }
8835
8836                 name = elf_sym_str(obj, sym.st_name) ?: "<?>";
8837                 map = find_struct_ops_map_by_offset(obj, rel.r_offset);
8838                 if (!map) {
8839                         pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
8840                                 (size_t)rel.r_offset);
8841                         return -EINVAL;
8842                 }
8843
8844                 moff = rel.r_offset - map->sec_offset;
8845                 shdr_idx = sym.st_shndx;
8846                 st_ops = map->st_ops;
8847                 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
8848                          map->name,
8849                          (long long)(rel.r_info >> 32),
8850                          (long long)sym.st_value,
8851                          shdr_idx, (size_t)rel.r_offset,
8852                          map->sec_offset, sym.st_name, name);
8853
8854                 if (shdr_idx >= SHN_LORESERVE) {
8855                         pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n",
8856                                 map->name, (size_t)rel.r_offset, shdr_idx);
8857                         return -LIBBPF_ERRNO__RELOC;
8858                 }
8859                 if (sym.st_value % BPF_INSN_SZ) {
8860                         pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
8861                                 map->name, (unsigned long long)sym.st_value);
8862                         return -LIBBPF_ERRNO__FORMAT;
8863                 }
8864                 insn_idx = sym.st_value / BPF_INSN_SZ;
8865
8866                 member = find_member_by_offset(st_ops->type, moff * 8);
8867                 if (!member) {
8868                         pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
8869                                 map->name, moff);
8870                         return -EINVAL;
8871                 }
8872                 member_idx = member - btf_members(st_ops->type);
8873                 name = btf__name_by_offset(btf, member->name_off);
8874
8875                 if (!resolve_func_ptr(btf, member->type, NULL)) {
8876                         pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
8877                                 map->name, name);
8878                         return -EINVAL;
8879                 }
8880
8881                 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
8882                 if (!prog) {
8883                         pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
8884                                 map->name, shdr_idx, name);
8885                         return -EINVAL;
8886                 }
8887
8888                 if (prog->type == BPF_PROG_TYPE_UNSPEC) {
8889                         const struct bpf_sec_def *sec_def;
8890
8891                         sec_def = find_sec_def(prog->sec_name);
8892                         if (sec_def &&
8893                             sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
8894                                 /* for pr_warn */
8895                                 prog->type = sec_def->prog_type;
8896                                 goto invalid_prog;
8897                         }
8898
8899                         prog->type = BPF_PROG_TYPE_STRUCT_OPS;
8900                         prog->attach_btf_id = st_ops->type_id;
8901                         prog->expected_attach_type = member_idx;
8902                 } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
8903                            prog->attach_btf_id != st_ops->type_id ||
8904                            prog->expected_attach_type != member_idx) {
8905                         goto invalid_prog;
8906                 }
8907                 st_ops->progs[member_idx] = prog;
8908         }
8909
8910         return 0;
8911
8912 invalid_prog:
8913         pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
8914                 map->name, prog->name, prog->sec_name, prog->type,
8915                 prog->attach_btf_id, prog->expected_attach_type, name);
8916         return -EINVAL;
8917 }
8918
8919 #define BTF_TRACE_PREFIX "btf_trace_"
8920 #define BTF_LSM_PREFIX "bpf_lsm_"
8921 #define BTF_ITER_PREFIX "bpf_iter_"
8922 #define BTF_MAX_NAME_SIZE 128
8923
8924 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
8925                                    const char *name, __u32 kind)
8926 {
8927         char btf_type_name[BTF_MAX_NAME_SIZE];
8928         int ret;
8929
8930         ret = snprintf(btf_type_name, sizeof(btf_type_name),
8931                        "%s%s", prefix, name);
8932         /* snprintf returns the number of characters written excluding the
8933          * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
8934          * indicates truncation.
8935          */
8936         if (ret < 0 || ret >= sizeof(btf_type_name))
8937                 return -ENAMETOOLONG;
8938         return btf__find_by_name_kind(btf, btf_type_name, kind);
8939 }
8940
8941 static inline int find_attach_btf_id(struct btf *btf, const char *name,
8942                                      enum bpf_attach_type attach_type)
8943 {
8944         int err;
8945
8946         if (attach_type == BPF_TRACE_RAW_TP)
8947                 err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
8948                                               BTF_KIND_TYPEDEF);
8949         else if (attach_type == BPF_LSM_MAC)
8950                 err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
8951                                               BTF_KIND_FUNC);
8952         else if (attach_type == BPF_TRACE_ITER)
8953                 err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
8954                                               BTF_KIND_FUNC);
8955         else
8956                 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
8957
8958         return err;
8959 }
8960
8961 int libbpf_find_vmlinux_btf_id(const char *name,
8962                                enum bpf_attach_type attach_type)
8963 {
8964         struct btf *btf;
8965         int err;
8966
8967         btf = libbpf_find_kernel_btf();
8968         if (IS_ERR(btf)) {
8969                 pr_warn("vmlinux BTF is not found\n");
8970                 return -EINVAL;
8971         }
8972
8973         err = find_attach_btf_id(btf, name, attach_type);
8974         if (err <= 0)
8975                 pr_warn("%s is not found in vmlinux BTF\n", name);
8976
8977         btf__free(btf);
8978         return err;
8979 }
8980
8981 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
8982 {
8983         struct bpf_prog_info_linear *info_linear;
8984         struct bpf_prog_info *info;
8985         struct btf *btf = NULL;
8986         int err = -EINVAL;
8987
8988         info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
8989         if (IS_ERR_OR_NULL(info_linear)) {
8990                 pr_warn("failed get_prog_info_linear for FD %d\n",
8991                         attach_prog_fd);
8992                 return -EINVAL;
8993         }
8994         info = &info_linear->info;
8995         if (!info->btf_id) {
8996                 pr_warn("The target program doesn't have BTF\n");
8997                 goto out;
8998         }
8999         if (btf__get_from_id(info->btf_id, &btf)) {
9000                 pr_warn("Failed to get BTF of the program\n");
9001                 goto out;
9002         }
9003         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
9004         btf__free(btf);
9005         if (err <= 0) {
9006                 pr_warn("%s is not found in prog's BTF\n", name);
9007                 goto out;
9008         }
9009 out:
9010         free(info_linear);
9011         return err;
9012 }
9013
9014 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
9015                               enum bpf_attach_type attach_type,
9016                               int *btf_obj_fd, int *btf_type_id)
9017 {
9018         int ret, i;
9019
9020         ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
9021         if (ret > 0) {
9022                 *btf_obj_fd = 0; /* vmlinux BTF */
9023                 *btf_type_id = ret;
9024                 return 0;
9025         }
9026         if (ret != -ENOENT)
9027                 return ret;
9028
9029         ret = load_module_btfs(obj);
9030         if (ret)
9031                 return ret;
9032
9033         for (i = 0; i < obj->btf_module_cnt; i++) {
9034                 const struct module_btf *mod = &obj->btf_modules[i];
9035
9036                 ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
9037                 if (ret > 0) {
9038                         *btf_obj_fd = mod->fd;
9039                         *btf_type_id = ret;
9040                         return 0;
9041                 }
9042                 if (ret == -ENOENT)
9043                         continue;
9044
9045                 return ret;
9046         }
9047
9048         return -ESRCH;
9049 }
9050
9051 static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id)
9052 {
9053         enum bpf_attach_type attach_type = prog->expected_attach_type;
9054         __u32 attach_prog_fd = prog->attach_prog_fd;
9055         const char *name = prog->sec_name, *attach_name;
9056         const struct bpf_sec_def *sec = NULL;
9057         int i, err;
9058
9059         if (!name)
9060                 return -EINVAL;
9061
9062         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9063                 if (!section_defs[i].is_attach_btf)
9064                         continue;
9065                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
9066                         continue;
9067
9068                 sec = &section_defs[i];
9069                 break;
9070         }
9071
9072         if (!sec) {
9073                 pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", name);
9074                 return -ESRCH;
9075         }
9076         attach_name = name + sec->len;
9077
9078         /* BPF program's BTF ID */
9079         if (attach_prog_fd) {
9080                 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
9081                 if (err < 0) {
9082                         pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9083                                  attach_prog_fd, attach_name, err);
9084                         return err;
9085                 }
9086                 *btf_obj_fd = 0;
9087                 *btf_type_id = err;
9088                 return 0;
9089         }
9090
9091         /* kernel/module BTF ID */
9092         err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
9093         if (err) {
9094                 pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
9095                 return err;
9096         }
9097         return 0;
9098 }
9099
9100 int libbpf_attach_type_by_name(const char *name,
9101                                enum bpf_attach_type *attach_type)
9102 {
9103         char *type_names;
9104         int i;
9105
9106         if (!name)
9107                 return -EINVAL;
9108
9109         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9110                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
9111                         continue;
9112                 if (!section_defs[i].is_attachable)
9113                         return -EINVAL;
9114                 *attach_type = section_defs[i].expected_attach_type;
9115                 return 0;
9116         }
9117         pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
9118         type_names = libbpf_get_type_names(true);
9119         if (type_names != NULL) {
9120                 pr_debug("attachable section(type) names are:%s\n", type_names);
9121                 free(type_names);
9122         }
9123
9124         return -EINVAL;
9125 }
9126
9127 int bpf_map__fd(const struct bpf_map *map)
9128 {
9129         return map ? map->fd : -EINVAL;
9130 }
9131
9132 const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
9133 {
9134         return map ? &map->def : ERR_PTR(-EINVAL);
9135 }
9136
9137 const char *bpf_map__name(const struct bpf_map *map)
9138 {
9139         return map ? map->name : NULL;
9140 }
9141
9142 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
9143 {
9144         return map->def.type;
9145 }
9146
9147 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
9148 {
9149         if (map->fd >= 0)
9150                 return -EBUSY;
9151         map->def.type = type;
9152         return 0;
9153 }
9154
9155 __u32 bpf_map__map_flags(const struct bpf_map *map)
9156 {
9157         return map->def.map_flags;
9158 }
9159
9160 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
9161 {
9162         if (map->fd >= 0)
9163                 return -EBUSY;
9164         map->def.map_flags = flags;
9165         return 0;
9166 }
9167
9168 __u32 bpf_map__numa_node(const struct bpf_map *map)
9169 {
9170         return map->numa_node;
9171 }
9172
9173 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
9174 {
9175         if (map->fd >= 0)
9176                 return -EBUSY;
9177         map->numa_node = numa_node;
9178         return 0;
9179 }
9180
9181 __u32 bpf_map__key_size(const struct bpf_map *map)
9182 {
9183         return map->def.key_size;
9184 }
9185
9186 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
9187 {
9188         if (map->fd >= 0)
9189                 return -EBUSY;
9190         map->def.key_size = size;
9191         return 0;
9192 }
9193
9194 __u32 bpf_map__value_size(const struct bpf_map *map)
9195 {
9196         return map->def.value_size;
9197 }
9198
9199 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
9200 {
9201         if (map->fd >= 0)
9202                 return -EBUSY;
9203         map->def.value_size = size;
9204         return 0;
9205 }
9206
9207 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
9208 {
9209         return map ? map->btf_key_type_id : 0;
9210 }
9211
9212 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
9213 {
9214         return map ? map->btf_value_type_id : 0;
9215 }
9216
9217 int bpf_map__set_priv(struct bpf_map *map, void *priv,
9218                      bpf_map_clear_priv_t clear_priv)
9219 {
9220         if (!map)
9221                 return -EINVAL;
9222
9223         if (map->priv) {
9224                 if (map->clear_priv)
9225                         map->clear_priv(map, map->priv);
9226         }
9227
9228         map->priv = priv;
9229         map->clear_priv = clear_priv;
9230         return 0;
9231 }
9232
9233 void *bpf_map__priv(const struct bpf_map *map)
9234 {
9235         return map ? map->priv : ERR_PTR(-EINVAL);
9236 }
9237
9238 int bpf_map__set_initial_value(struct bpf_map *map,
9239                                const void *data, size_t size)
9240 {
9241         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
9242             size != map->def.value_size || map->fd >= 0)
9243                 return -EINVAL;
9244
9245         memcpy(map->mmaped, data, size);
9246         return 0;
9247 }
9248
9249 bool bpf_map__is_offload_neutral(const struct bpf_map *map)
9250 {
9251         return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
9252 }
9253
9254 bool bpf_map__is_internal(const struct bpf_map *map)
9255 {
9256         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
9257 }
9258
9259 __u32 bpf_map__ifindex(const struct bpf_map *map)
9260 {
9261         return map->map_ifindex;
9262 }
9263
9264 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
9265 {
9266         if (map->fd >= 0)
9267                 return -EBUSY;
9268         map->map_ifindex = ifindex;
9269         return 0;
9270 }
9271
9272 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
9273 {
9274         if (!bpf_map_type__is_map_in_map(map->def.type)) {
9275                 pr_warn("error: unsupported map type\n");
9276                 return -EINVAL;
9277         }
9278         if (map->inner_map_fd != -1) {
9279                 pr_warn("error: inner_map_fd already specified\n");
9280                 return -EINVAL;
9281         }
9282         map->inner_map_fd = fd;
9283         return 0;
9284 }
9285
9286 static struct bpf_map *
9287 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
9288 {
9289         ssize_t idx;
9290         struct bpf_map *s, *e;
9291
9292         if (!obj || !obj->maps)
9293                 return NULL;
9294
9295         s = obj->maps;
9296         e = obj->maps + obj->nr_maps;
9297
9298         if ((m < s) || (m >= e)) {
9299                 pr_warn("error in %s: map handler doesn't belong to object\n",
9300                          __func__);
9301                 return NULL;
9302         }
9303
9304         idx = (m - obj->maps) + i;
9305         if (idx >= obj->nr_maps || idx < 0)
9306                 return NULL;
9307         return &obj->maps[idx];
9308 }
9309
9310 struct bpf_map *
9311 bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
9312 {
9313         if (prev == NULL)
9314                 return obj->maps;
9315
9316         return __bpf_map__iter(prev, obj, 1);
9317 }
9318
9319 struct bpf_map *
9320 bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
9321 {
9322         if (next == NULL) {
9323                 if (!obj->nr_maps)
9324                         return NULL;
9325                 return obj->maps + obj->nr_maps - 1;
9326         }
9327
9328         return __bpf_map__iter(next, obj, -1);
9329 }
9330
9331 struct bpf_map *
9332 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
9333 {
9334         struct bpf_map *pos;
9335
9336         bpf_object__for_each_map(pos, obj) {
9337                 if (pos->name && !strcmp(pos->name, name))
9338                         return pos;
9339         }
9340         return NULL;
9341 }
9342
9343 int
9344 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
9345 {
9346         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
9347 }
9348
9349 struct bpf_map *
9350 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
9351 {
9352         return ERR_PTR(-ENOTSUP);
9353 }
9354
9355 long libbpf_get_error(const void *ptr)
9356 {
9357         return PTR_ERR_OR_ZERO(ptr);
9358 }
9359
9360 int bpf_prog_load(const char *file, enum bpf_prog_type type,
9361                   struct bpf_object **pobj, int *prog_fd)
9362 {
9363         struct bpf_prog_load_attr attr;
9364
9365         memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
9366         attr.file = file;
9367         attr.prog_type = type;
9368         attr.expected_attach_type = 0;
9369
9370         return bpf_prog_load_xattr(&attr, pobj, prog_fd);
9371 }
9372
9373 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
9374                         struct bpf_object **pobj, int *prog_fd)
9375 {
9376         struct bpf_object_open_attr open_attr = {};
9377         struct bpf_program *prog, *first_prog = NULL;
9378         struct bpf_object *obj;
9379         struct bpf_map *map;
9380         int err;
9381
9382         if (!attr)
9383                 return -EINVAL;
9384         if (!attr->file)
9385                 return -EINVAL;
9386
9387         open_attr.file = attr->file;
9388         open_attr.prog_type = attr->prog_type;
9389
9390         obj = bpf_object__open_xattr(&open_attr);
9391         if (IS_ERR_OR_NULL(obj))
9392                 return -ENOENT;
9393
9394         bpf_object__for_each_program(prog, obj) {
9395                 enum bpf_attach_type attach_type = attr->expected_attach_type;
9396                 /*
9397                  * to preserve backwards compatibility, bpf_prog_load treats
9398                  * attr->prog_type, if specified, as an override to whatever
9399                  * bpf_object__open guessed
9400                  */
9401                 if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
9402                         bpf_program__set_type(prog, attr->prog_type);
9403                         bpf_program__set_expected_attach_type(prog,
9404                                                               attach_type);
9405                 }
9406                 if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
9407                         /*
9408                          * we haven't guessed from section name and user
9409                          * didn't provide a fallback type, too bad...
9410                          */
9411                         bpf_object__close(obj);
9412                         return -EINVAL;
9413                 }
9414
9415                 prog->prog_ifindex = attr->ifindex;
9416                 prog->log_level = attr->log_level;
9417                 prog->prog_flags |= attr->prog_flags;
9418                 if (!first_prog)
9419                         first_prog = prog;
9420         }
9421
9422         bpf_object__for_each_map(map, obj) {
9423                 if (!bpf_map__is_offload_neutral(map))
9424                         map->map_ifindex = attr->ifindex;
9425         }
9426
9427         if (!first_prog) {
9428                 pr_warn("object file doesn't contain bpf program\n");
9429                 bpf_object__close(obj);
9430                 return -ENOENT;
9431         }
9432
9433         err = bpf_object__load(obj);
9434         if (err) {
9435                 bpf_object__close(obj);
9436                 return err;
9437         }
9438
9439         *pobj = obj;
9440         *prog_fd = bpf_program__fd(first_prog);
9441         return 0;
9442 }
9443
9444 struct bpf_link {
9445         int (*detach)(struct bpf_link *link);
9446         int (*destroy)(struct bpf_link *link);
9447         char *pin_path;         /* NULL, if not pinned */
9448         int fd;                 /* hook FD, -1 if not applicable */
9449         bool disconnected;
9450 };
9451
9452 /* Replace link's underlying BPF program with the new one */
9453 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
9454 {
9455         return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
9456 }
9457
9458 /* Release "ownership" of underlying BPF resource (typically, BPF program
9459  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
9460  * link, when destructed through bpf_link__destroy() call won't attempt to
9461  * detach/unregisted that BPF resource. This is useful in situations where,
9462  * say, attached BPF program has to outlive userspace program that attached it
9463  * in the system. Depending on type of BPF program, though, there might be
9464  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
9465  * exit of userspace program doesn't trigger automatic detachment and clean up
9466  * inside the kernel.
9467  */
9468 void bpf_link__disconnect(struct bpf_link *link)
9469 {
9470         link->disconnected = true;
9471 }
9472
9473 int bpf_link__destroy(struct bpf_link *link)
9474 {
9475         int err = 0;
9476
9477         if (IS_ERR_OR_NULL(link))
9478                 return 0;
9479
9480         if (!link->disconnected && link->detach)
9481                 err = link->detach(link);
9482         if (link->destroy)
9483                 link->destroy(link);
9484         if (link->pin_path)
9485                 free(link->pin_path);
9486         free(link);
9487
9488         return err;
9489 }
9490
9491 int bpf_link__fd(const struct bpf_link *link)
9492 {
9493         return link->fd;
9494 }
9495
9496 const char *bpf_link__pin_path(const struct bpf_link *link)
9497 {
9498         return link->pin_path;
9499 }
9500
9501 static int bpf_link__detach_fd(struct bpf_link *link)
9502 {
9503         return close(link->fd);
9504 }
9505
9506 struct bpf_link *bpf_link__open(const char *path)
9507 {
9508         struct bpf_link *link;
9509         int fd;
9510
9511         fd = bpf_obj_get(path);
9512         if (fd < 0) {
9513                 fd = -errno;
9514                 pr_warn("failed to open link at %s: %d\n", path, fd);
9515                 return ERR_PTR(fd);
9516         }
9517
9518         link = calloc(1, sizeof(*link));
9519         if (!link) {
9520                 close(fd);
9521                 return ERR_PTR(-ENOMEM);
9522         }
9523         link->detach = &bpf_link__detach_fd;
9524         link->fd = fd;
9525
9526         link->pin_path = strdup(path);
9527         if (!link->pin_path) {
9528                 bpf_link__destroy(link);
9529                 return ERR_PTR(-ENOMEM);
9530         }
9531
9532         return link;
9533 }
9534
9535 int bpf_link__detach(struct bpf_link *link)
9536 {
9537         return bpf_link_detach(link->fd) ? -errno : 0;
9538 }
9539
9540 int bpf_link__pin(struct bpf_link *link, const char *path)
9541 {
9542         int err;
9543
9544         if (link->pin_path)
9545                 return -EBUSY;
9546         err = make_parent_dir(path);
9547         if (err)
9548                 return err;
9549         err = check_path(path);
9550         if (err)
9551                 return err;
9552
9553         link->pin_path = strdup(path);
9554         if (!link->pin_path)
9555                 return -ENOMEM;
9556
9557         if (bpf_obj_pin(link->fd, link->pin_path)) {
9558                 err = -errno;
9559                 zfree(&link->pin_path);
9560                 return err;
9561         }
9562
9563         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
9564         return 0;
9565 }
9566
9567 int bpf_link__unpin(struct bpf_link *link)
9568 {
9569         int err;
9570
9571         if (!link->pin_path)
9572                 return -EINVAL;
9573
9574         err = unlink(link->pin_path);
9575         if (err != 0)
9576                 return -errno;
9577
9578         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
9579         zfree(&link->pin_path);
9580         return 0;
9581 }
9582
9583 static int bpf_link__detach_perf_event(struct bpf_link *link)
9584 {
9585         int err;
9586
9587         err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
9588         if (err)
9589                 err = -errno;
9590
9591         close(link->fd);
9592         return err;
9593 }
9594
9595 struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
9596                                                 int pfd)
9597 {
9598         char errmsg[STRERR_BUFSIZE];
9599         struct bpf_link *link;
9600         int prog_fd, err;
9601
9602         if (pfd < 0) {
9603                 pr_warn("prog '%s': invalid perf event FD %d\n",
9604                         prog->name, pfd);
9605                 return ERR_PTR(-EINVAL);
9606         }
9607         prog_fd = bpf_program__fd(prog);
9608         if (prog_fd < 0) {
9609                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
9610                         prog->name);
9611                 return ERR_PTR(-EINVAL);
9612         }
9613
9614         link = calloc(1, sizeof(*link));
9615         if (!link)
9616                 return ERR_PTR(-ENOMEM);
9617         link->detach = &bpf_link__detach_perf_event;
9618         link->fd = pfd;
9619
9620         if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
9621                 err = -errno;
9622                 free(link);
9623                 pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
9624                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9625                 if (err == -EPROTO)
9626                         pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
9627                                 prog->name, pfd);
9628                 return ERR_PTR(err);
9629         }
9630         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
9631                 err = -errno;
9632                 free(link);
9633                 pr_warn("prog '%s': failed to enable pfd %d: %s\n",
9634                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9635                 return ERR_PTR(err);
9636         }
9637         return link;
9638 }
9639
9640 /*
9641  * this function is expected to parse integer in the range of [0, 2^31-1] from
9642  * given file using scanf format string fmt. If actual parsed value is
9643  * negative, the result might be indistinguishable from error
9644  */
9645 static int parse_uint_from_file(const char *file, const char *fmt)
9646 {
9647         char buf[STRERR_BUFSIZE];
9648         int err, ret;
9649         FILE *f;
9650
9651         f = fopen(file, "r");
9652         if (!f) {
9653                 err = -errno;
9654                 pr_debug("failed to open '%s': %s\n", file,
9655                          libbpf_strerror_r(err, buf, sizeof(buf)));
9656                 return err;
9657         }
9658         err = fscanf(f, fmt, &ret);
9659         if (err != 1) {
9660                 err = err == EOF ? -EIO : -errno;
9661                 pr_debug("failed to parse '%s': %s\n", file,
9662                         libbpf_strerror_r(err, buf, sizeof(buf)));
9663                 fclose(f);
9664                 return err;
9665         }
9666         fclose(f);
9667         return ret;
9668 }
9669
9670 static int determine_kprobe_perf_type(void)
9671 {
9672         const char *file = "/sys/bus/event_source/devices/kprobe/type";
9673
9674         return parse_uint_from_file(file, "%d\n");
9675 }
9676
9677 static int determine_uprobe_perf_type(void)
9678 {
9679         const char *file = "/sys/bus/event_source/devices/uprobe/type";
9680
9681         return parse_uint_from_file(file, "%d\n");
9682 }
9683
9684 static int determine_kprobe_retprobe_bit(void)
9685 {
9686         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
9687
9688         return parse_uint_from_file(file, "config:%d\n");
9689 }
9690
9691 static int determine_uprobe_retprobe_bit(void)
9692 {
9693         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
9694
9695         return parse_uint_from_file(file, "config:%d\n");
9696 }
9697
9698 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
9699                                  uint64_t offset, int pid)
9700 {
9701         struct perf_event_attr attr = {};
9702         char errmsg[STRERR_BUFSIZE];
9703         int type, pfd, err;
9704
9705         type = uprobe ? determine_uprobe_perf_type()
9706                       : determine_kprobe_perf_type();
9707         if (type < 0) {
9708                 pr_warn("failed to determine %s perf type: %s\n",
9709                         uprobe ? "uprobe" : "kprobe",
9710                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
9711                 return type;
9712         }
9713         if (retprobe) {
9714                 int bit = uprobe ? determine_uprobe_retprobe_bit()
9715                                  : determine_kprobe_retprobe_bit();
9716
9717                 if (bit < 0) {
9718                         pr_warn("failed to determine %s retprobe bit: %s\n",
9719                                 uprobe ? "uprobe" : "kprobe",
9720                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
9721                         return bit;
9722                 }
9723                 attr.config |= 1 << bit;
9724         }
9725         attr.size = sizeof(attr);
9726         attr.type = type;
9727         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
9728         attr.config2 = offset;           /* kprobe_addr or probe_offset */
9729
9730         /* pid filter is meaningful only for uprobes */
9731         pfd = syscall(__NR_perf_event_open, &attr,
9732                       pid < 0 ? -1 : pid /* pid */,
9733                       pid == -1 ? 0 : -1 /* cpu */,
9734                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
9735         if (pfd < 0) {
9736                 err = -errno;
9737                 pr_warn("%s perf_event_open() failed: %s\n",
9738                         uprobe ? "uprobe" : "kprobe",
9739                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9740                 return err;
9741         }
9742         return pfd;
9743 }
9744
9745 struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
9746                                             bool retprobe,
9747                                             const char *func_name)
9748 {
9749         char errmsg[STRERR_BUFSIZE];
9750         struct bpf_link *link;
9751         int pfd, err;
9752
9753         pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
9754                                     0 /* offset */, -1 /* pid */);
9755         if (pfd < 0) {
9756                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
9757                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
9758                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9759                 return ERR_PTR(pfd);
9760         }
9761         link = bpf_program__attach_perf_event(prog, pfd);
9762         if (IS_ERR(link)) {
9763                 close(pfd);
9764                 err = PTR_ERR(link);
9765                 pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
9766                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
9767                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9768                 return link;
9769         }
9770         return link;
9771 }
9772
9773 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
9774                                       struct bpf_program *prog)
9775 {
9776         const char *func_name;
9777         bool retprobe;
9778
9779         func_name = prog->sec_name + sec->len;
9780         retprobe = strcmp(sec->sec, "kretprobe/") == 0;
9781
9782         return bpf_program__attach_kprobe(prog, retprobe, func_name);
9783 }
9784
9785 struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
9786                                             bool retprobe, pid_t pid,
9787                                             const char *binary_path,
9788                                             size_t func_offset)
9789 {
9790         char errmsg[STRERR_BUFSIZE];
9791         struct bpf_link *link;
9792         int pfd, err;
9793
9794         pfd = perf_event_open_probe(true /* uprobe */, retprobe,
9795                                     binary_path, func_offset, pid);
9796         if (pfd < 0) {
9797                 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
9798                         prog->name, retprobe ? "uretprobe" : "uprobe",
9799                         binary_path, func_offset,
9800                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9801                 return ERR_PTR(pfd);
9802         }
9803         link = bpf_program__attach_perf_event(prog, pfd);
9804         if (IS_ERR(link)) {
9805                 close(pfd);
9806                 err = PTR_ERR(link);
9807                 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
9808                         prog->name, retprobe ? "uretprobe" : "uprobe",
9809                         binary_path, func_offset,
9810                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9811                 return link;
9812         }
9813         return link;
9814 }
9815
9816 static int determine_tracepoint_id(const char *tp_category,
9817                                    const char *tp_name)
9818 {
9819         char file[PATH_MAX];
9820         int ret;
9821
9822         ret = snprintf(file, sizeof(file),
9823                        "/sys/kernel/debug/tracing/events/%s/%s/id",
9824                        tp_category, tp_name);
9825         if (ret < 0)
9826                 return -errno;
9827         if (ret >= sizeof(file)) {
9828                 pr_debug("tracepoint %s/%s path is too long\n",
9829                          tp_category, tp_name);
9830                 return -E2BIG;
9831         }
9832         return parse_uint_from_file(file, "%d\n");
9833 }
9834
9835 static int perf_event_open_tracepoint(const char *tp_category,
9836                                       const char *tp_name)
9837 {
9838         struct perf_event_attr attr = {};
9839         char errmsg[STRERR_BUFSIZE];
9840         int tp_id, pfd, err;
9841
9842         tp_id = determine_tracepoint_id(tp_category, tp_name);
9843         if (tp_id < 0) {
9844                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
9845                         tp_category, tp_name,
9846                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
9847                 return tp_id;
9848         }
9849
9850         attr.type = PERF_TYPE_TRACEPOINT;
9851         attr.size = sizeof(attr);
9852         attr.config = tp_id;
9853
9854         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
9855                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
9856         if (pfd < 0) {
9857                 err = -errno;
9858                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
9859                         tp_category, tp_name,
9860                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9861                 return err;
9862         }
9863         return pfd;
9864 }
9865
9866 struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
9867                                                 const char *tp_category,
9868                                                 const char *tp_name)
9869 {
9870         char errmsg[STRERR_BUFSIZE];
9871         struct bpf_link *link;
9872         int pfd, err;
9873
9874         pfd = perf_event_open_tracepoint(tp_category, tp_name);
9875         if (pfd < 0) {
9876                 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
9877                         prog->name, tp_category, tp_name,
9878                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9879                 return ERR_PTR(pfd);
9880         }
9881         link = bpf_program__attach_perf_event(prog, pfd);
9882         if (IS_ERR(link)) {
9883                 close(pfd);
9884                 err = PTR_ERR(link);
9885                 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
9886                         prog->name, tp_category, tp_name,
9887                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9888                 return link;
9889         }
9890         return link;
9891 }
9892
9893 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
9894                                   struct bpf_program *prog)
9895 {
9896         char *sec_name, *tp_cat, *tp_name;
9897         struct bpf_link *link;
9898
9899         sec_name = strdup(prog->sec_name);
9900         if (!sec_name)
9901                 return ERR_PTR(-ENOMEM);
9902
9903         /* extract "tp/<category>/<name>" */
9904         tp_cat = sec_name + sec->len;
9905         tp_name = strchr(tp_cat, '/');
9906         if (!tp_name) {
9907                 link = ERR_PTR(-EINVAL);
9908                 goto out;
9909         }
9910         *tp_name = '\0';
9911         tp_name++;
9912
9913         link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
9914 out:
9915         free(sec_name);
9916         return link;
9917 }
9918
9919 struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
9920                                                     const char *tp_name)
9921 {
9922         char errmsg[STRERR_BUFSIZE];
9923         struct bpf_link *link;
9924         int prog_fd, pfd;
9925
9926         prog_fd = bpf_program__fd(prog);
9927         if (prog_fd < 0) {
9928                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9929                 return ERR_PTR(-EINVAL);
9930         }
9931
9932         link = calloc(1, sizeof(*link));
9933         if (!link)
9934                 return ERR_PTR(-ENOMEM);
9935         link->detach = &bpf_link__detach_fd;
9936
9937         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
9938         if (pfd < 0) {
9939                 pfd = -errno;
9940                 free(link);
9941                 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
9942                         prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9943                 return ERR_PTR(pfd);
9944         }
9945         link->fd = pfd;
9946         return link;
9947 }
9948
9949 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
9950                                       struct bpf_program *prog)
9951 {
9952         const char *tp_name = prog->sec_name + sec->len;
9953
9954         return bpf_program__attach_raw_tracepoint(prog, tp_name);
9955 }
9956
9957 /* Common logic for all BPF program types that attach to a btf_id */
9958 static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
9959 {
9960         char errmsg[STRERR_BUFSIZE];
9961         struct bpf_link *link;
9962         int prog_fd, pfd;
9963
9964         prog_fd = bpf_program__fd(prog);
9965         if (prog_fd < 0) {
9966                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9967                 return ERR_PTR(-EINVAL);
9968         }
9969
9970         link = calloc(1, sizeof(*link));
9971         if (!link)
9972                 return ERR_PTR(-ENOMEM);
9973         link->detach = &bpf_link__detach_fd;
9974
9975         pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
9976         if (pfd < 0) {
9977                 pfd = -errno;
9978                 free(link);
9979                 pr_warn("prog '%s': failed to attach: %s\n",
9980                         prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9981                 return ERR_PTR(pfd);
9982         }
9983         link->fd = pfd;
9984         return (struct bpf_link *)link;
9985 }
9986
9987 struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
9988 {
9989         return bpf_program__attach_btf_id(prog);
9990 }
9991
9992 struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
9993 {
9994         return bpf_program__attach_btf_id(prog);
9995 }
9996
9997 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
9998                                      struct bpf_program *prog)
9999 {
10000         return bpf_program__attach_trace(prog);
10001 }
10002
10003 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
10004                                    struct bpf_program *prog)
10005 {
10006         return bpf_program__attach_lsm(prog);
10007 }
10008
10009 static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
10010                                     struct bpf_program *prog)
10011 {
10012         return bpf_program__attach_iter(prog, NULL);
10013 }
10014
10015 static struct bpf_link *
10016 bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
10017                        const char *target_name)
10018 {
10019         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
10020                             .target_btf_id = btf_id);
10021         enum bpf_attach_type attach_type;
10022         char errmsg[STRERR_BUFSIZE];
10023         struct bpf_link *link;
10024         int prog_fd, link_fd;
10025
10026         prog_fd = bpf_program__fd(prog);
10027         if (prog_fd < 0) {
10028                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10029                 return ERR_PTR(-EINVAL);
10030         }
10031
10032         link = calloc(1, sizeof(*link));
10033         if (!link)
10034                 return ERR_PTR(-ENOMEM);
10035         link->detach = &bpf_link__detach_fd;
10036
10037         attach_type = bpf_program__get_expected_attach_type(prog);
10038         link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
10039         if (link_fd < 0) {
10040                 link_fd = -errno;
10041                 free(link);
10042                 pr_warn("prog '%s': failed to attach to %s: %s\n",
10043                         prog->name, target_name,
10044                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
10045                 return ERR_PTR(link_fd);
10046         }
10047         link->fd = link_fd;
10048         return link;
10049 }
10050
10051 struct bpf_link *
10052 bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
10053 {
10054         return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
10055 }
10056
10057 struct bpf_link *
10058 bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
10059 {
10060         return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
10061 }
10062
10063 struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
10064 {
10065         /* target_fd/target_ifindex use the same field in LINK_CREATE */
10066         return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
10067 }
10068
10069 struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
10070                                               int target_fd,
10071                                               const char *attach_func_name)
10072 {
10073         int btf_id;
10074
10075         if (!!target_fd != !!attach_func_name) {
10076                 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
10077                         prog->name);
10078                 return ERR_PTR(-EINVAL);
10079         }
10080
10081         if (prog->type != BPF_PROG_TYPE_EXT) {
10082                 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
10083                         prog->name);
10084                 return ERR_PTR(-EINVAL);
10085         }
10086
10087         if (target_fd) {
10088                 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
10089                 if (btf_id < 0)
10090                         return ERR_PTR(btf_id);
10091
10092                 return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
10093         } else {
10094                 /* no target, so use raw_tracepoint_open for compatibility
10095                  * with old kernels
10096                  */
10097                 return bpf_program__attach_trace(prog);
10098         }
10099 }
10100
10101 struct bpf_link *
10102 bpf_program__attach_iter(struct bpf_program *prog,
10103                          const struct bpf_iter_attach_opts *opts)
10104 {
10105         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
10106         char errmsg[STRERR_BUFSIZE];
10107         struct bpf_link *link;
10108         int prog_fd, link_fd;
10109         __u32 target_fd = 0;
10110
10111         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
10112                 return ERR_PTR(-EINVAL);
10113
10114         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
10115         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
10116
10117         prog_fd = bpf_program__fd(prog);
10118         if (prog_fd < 0) {
10119                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10120                 return ERR_PTR(-EINVAL);
10121         }
10122
10123         link = calloc(1, sizeof(*link));
10124         if (!link)
10125                 return ERR_PTR(-ENOMEM);
10126         link->detach = &bpf_link__detach_fd;
10127
10128         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
10129                                   &link_create_opts);
10130         if (link_fd < 0) {
10131                 link_fd = -errno;
10132                 free(link);
10133                 pr_warn("prog '%s': failed to attach to iterator: %s\n",
10134                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
10135                 return ERR_PTR(link_fd);
10136         }
10137         link->fd = link_fd;
10138         return link;
10139 }
10140
10141 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
10142 {
10143         const struct bpf_sec_def *sec_def;
10144
10145         sec_def = find_sec_def(prog->sec_name);
10146         if (!sec_def || !sec_def->attach_fn)
10147                 return ERR_PTR(-ESRCH);
10148
10149         return sec_def->attach_fn(sec_def, prog);
10150 }
10151
10152 static int bpf_link__detach_struct_ops(struct bpf_link *link)
10153 {
10154         __u32 zero = 0;
10155
10156         if (bpf_map_delete_elem(link->fd, &zero))
10157                 return -errno;
10158
10159         return 0;
10160 }
10161
10162 struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
10163 {
10164         struct bpf_struct_ops *st_ops;
10165         struct bpf_link *link;
10166         __u32 i, zero = 0;
10167         int err;
10168
10169         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
10170                 return ERR_PTR(-EINVAL);
10171
10172         link = calloc(1, sizeof(*link));
10173         if (!link)
10174                 return ERR_PTR(-EINVAL);
10175
10176         st_ops = map->st_ops;
10177         for (i = 0; i < btf_vlen(st_ops->type); i++) {
10178                 struct bpf_program *prog = st_ops->progs[i];
10179                 void *kern_data;
10180                 int prog_fd;
10181
10182                 if (!prog)
10183                         continue;
10184
10185                 prog_fd = bpf_program__fd(prog);
10186                 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
10187                 *(unsigned long *)kern_data = prog_fd;
10188         }
10189
10190         err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
10191         if (err) {
10192                 err = -errno;
10193                 free(link);
10194                 return ERR_PTR(err);
10195         }
10196
10197         link->detach = bpf_link__detach_struct_ops;
10198         link->fd = map->fd;
10199
10200         return link;
10201 }
10202
10203 enum bpf_perf_event_ret
10204 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
10205                            void **copy_mem, size_t *copy_size,
10206                            bpf_perf_event_print_t fn, void *private_data)
10207 {
10208         struct perf_event_mmap_page *header = mmap_mem;
10209         __u64 data_head = ring_buffer_read_head(header);
10210         __u64 data_tail = header->data_tail;
10211         void *base = ((__u8 *)header) + page_size;
10212         int ret = LIBBPF_PERF_EVENT_CONT;
10213         struct perf_event_header *ehdr;
10214         size_t ehdr_size;
10215
10216         while (data_head != data_tail) {
10217                 ehdr = base + (data_tail & (mmap_size - 1));
10218                 ehdr_size = ehdr->size;
10219
10220                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
10221                         void *copy_start = ehdr;
10222                         size_t len_first = base + mmap_size - copy_start;
10223                         size_t len_secnd = ehdr_size - len_first;
10224
10225                         if (*copy_size < ehdr_size) {
10226                                 free(*copy_mem);
10227                                 *copy_mem = malloc(ehdr_size);
10228                                 if (!*copy_mem) {
10229                                         *copy_size = 0;
10230                                         ret = LIBBPF_PERF_EVENT_ERROR;
10231                                         break;
10232                                 }
10233                                 *copy_size = ehdr_size;
10234                         }
10235
10236                         memcpy(*copy_mem, copy_start, len_first);
10237                         memcpy(*copy_mem + len_first, base, len_secnd);
10238                         ehdr = *copy_mem;
10239                 }
10240
10241                 ret = fn(ehdr, private_data);
10242                 data_tail += ehdr_size;
10243                 if (ret != LIBBPF_PERF_EVENT_CONT)
10244                         break;
10245         }
10246
10247         ring_buffer_write_tail(header, data_tail);
10248         return ret;
10249 }
10250
10251 struct perf_buffer;
10252
10253 struct perf_buffer_params {
10254         struct perf_event_attr *attr;
10255         /* if event_cb is specified, it takes precendence */
10256         perf_buffer_event_fn event_cb;
10257         /* sample_cb and lost_cb are higher-level common-case callbacks */
10258         perf_buffer_sample_fn sample_cb;
10259         perf_buffer_lost_fn lost_cb;
10260         void *ctx;
10261         int cpu_cnt;
10262         int *cpus;
10263         int *map_keys;
10264 };
10265
10266 struct perf_cpu_buf {
10267         struct perf_buffer *pb;
10268         void *base; /* mmap()'ed memory */
10269         void *buf; /* for reconstructing segmented data */
10270         size_t buf_size;
10271         int fd;
10272         int cpu;
10273         int map_key;
10274 };
10275
10276 struct perf_buffer {
10277         perf_buffer_event_fn event_cb;
10278         perf_buffer_sample_fn sample_cb;
10279         perf_buffer_lost_fn lost_cb;
10280         void *ctx; /* passed into callbacks */
10281
10282         size_t page_size;
10283         size_t mmap_size;
10284         struct perf_cpu_buf **cpu_bufs;
10285         struct epoll_event *events;
10286         int cpu_cnt; /* number of allocated CPU buffers */
10287         int epoll_fd; /* perf event FD */
10288         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
10289 };
10290
10291 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
10292                                       struct perf_cpu_buf *cpu_buf)
10293 {
10294         if (!cpu_buf)
10295                 return;
10296         if (cpu_buf->base &&
10297             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
10298                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
10299         if (cpu_buf->fd >= 0) {
10300                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
10301                 close(cpu_buf->fd);
10302         }
10303         free(cpu_buf->buf);
10304         free(cpu_buf);
10305 }
10306
10307 void perf_buffer__free(struct perf_buffer *pb)
10308 {
10309         int i;
10310
10311         if (IS_ERR_OR_NULL(pb))
10312                 return;
10313         if (pb->cpu_bufs) {
10314                 for (i = 0; i < pb->cpu_cnt; i++) {
10315                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
10316
10317                         if (!cpu_buf)
10318                                 continue;
10319
10320                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
10321                         perf_buffer__free_cpu_buf(pb, cpu_buf);
10322                 }
10323                 free(pb->cpu_bufs);
10324         }
10325         if (pb->epoll_fd >= 0)
10326                 close(pb->epoll_fd);
10327         free(pb->events);
10328         free(pb);
10329 }
10330
10331 static struct perf_cpu_buf *
10332 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
10333                           int cpu, int map_key)
10334 {
10335         struct perf_cpu_buf *cpu_buf;
10336         char msg[STRERR_BUFSIZE];
10337         int err;
10338
10339         cpu_buf = calloc(1, sizeof(*cpu_buf));
10340         if (!cpu_buf)
10341                 return ERR_PTR(-ENOMEM);
10342
10343         cpu_buf->pb = pb;
10344         cpu_buf->cpu = cpu;
10345         cpu_buf->map_key = map_key;
10346
10347         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
10348                               -1, PERF_FLAG_FD_CLOEXEC);
10349         if (cpu_buf->fd < 0) {
10350                 err = -errno;
10351                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
10352                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
10353                 goto error;
10354         }
10355
10356         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
10357                              PROT_READ | PROT_WRITE, MAP_SHARED,
10358                              cpu_buf->fd, 0);
10359         if (cpu_buf->base == MAP_FAILED) {
10360                 cpu_buf->base = NULL;
10361                 err = -errno;
10362                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
10363                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
10364                 goto error;
10365         }
10366
10367         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10368                 err = -errno;
10369                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
10370                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
10371                 goto error;
10372         }
10373
10374         return cpu_buf;
10375
10376 error:
10377         perf_buffer__free_cpu_buf(pb, cpu_buf);
10378         return (struct perf_cpu_buf *)ERR_PTR(err);
10379 }
10380
10381 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
10382                                               struct perf_buffer_params *p);
10383
10384 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
10385                                      const struct perf_buffer_opts *opts)
10386 {
10387         struct perf_buffer_params p = {};
10388         struct perf_event_attr attr = { 0, };
10389
10390         attr.config = PERF_COUNT_SW_BPF_OUTPUT;
10391         attr.type = PERF_TYPE_SOFTWARE;
10392         attr.sample_type = PERF_SAMPLE_RAW;
10393         attr.sample_period = 1;
10394         attr.wakeup_events = 1;
10395
10396         p.attr = &attr;
10397         p.sample_cb = opts ? opts->sample_cb : NULL;
10398         p.lost_cb = opts ? opts->lost_cb : NULL;
10399         p.ctx = opts ? opts->ctx : NULL;
10400
10401         return __perf_buffer__new(map_fd, page_cnt, &p);
10402 }
10403
10404 struct perf_buffer *
10405 perf_buffer__new_raw(int map_fd, size_t page_cnt,
10406                      const struct perf_buffer_raw_opts *opts)
10407 {
10408         struct perf_buffer_params p = {};
10409
10410         p.attr = opts->attr;
10411         p.event_cb = opts->event_cb;
10412         p.ctx = opts->ctx;
10413         p.cpu_cnt = opts->cpu_cnt;
10414         p.cpus = opts->cpus;
10415         p.map_keys = opts->map_keys;
10416
10417         return __perf_buffer__new(map_fd, page_cnt, &p);
10418 }
10419
10420 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
10421                                               struct perf_buffer_params *p)
10422 {
10423         const char *online_cpus_file = "/sys/devices/system/cpu/online";
10424         struct bpf_map_info map;
10425         char msg[STRERR_BUFSIZE];
10426         struct perf_buffer *pb;
10427         bool *online = NULL;
10428         __u32 map_info_len;
10429         int err, i, j, n;
10430
10431         if (page_cnt & (page_cnt - 1)) {
10432                 pr_warn("page count should be power of two, but is %zu\n",
10433                         page_cnt);
10434                 return ERR_PTR(-EINVAL);
10435         }
10436
10437         /* best-effort sanity checks */
10438         memset(&map, 0, sizeof(map));
10439         map_info_len = sizeof(map);
10440         err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
10441         if (err) {
10442                 err = -errno;
10443                 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
10444                  * -EBADFD, -EFAULT, or -E2BIG on real error
10445                  */
10446                 if (err != -EINVAL) {
10447                         pr_warn("failed to get map info for map FD %d: %s\n",
10448                                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
10449                         return ERR_PTR(err);
10450                 }
10451                 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
10452                          map_fd);
10453         } else {
10454                 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
10455                         pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
10456                                 map.name);
10457                         return ERR_PTR(-EINVAL);
10458                 }
10459         }
10460
10461         pb = calloc(1, sizeof(*pb));
10462         if (!pb)
10463                 return ERR_PTR(-ENOMEM);
10464
10465         pb->event_cb = p->event_cb;
10466         pb->sample_cb = p->sample_cb;
10467         pb->lost_cb = p->lost_cb;
10468         pb->ctx = p->ctx;
10469
10470         pb->page_size = getpagesize();
10471         pb->mmap_size = pb->page_size * page_cnt;
10472         pb->map_fd = map_fd;
10473
10474         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
10475         if (pb->epoll_fd < 0) {
10476                 err = -errno;
10477                 pr_warn("failed to create epoll instance: %s\n",
10478                         libbpf_strerror_r(err, msg, sizeof(msg)));
10479                 goto error;
10480         }
10481
10482         if (p->cpu_cnt > 0) {
10483                 pb->cpu_cnt = p->cpu_cnt;
10484         } else {
10485                 pb->cpu_cnt = libbpf_num_possible_cpus();
10486                 if (pb->cpu_cnt < 0) {
10487                         err = pb->cpu_cnt;
10488                         goto error;
10489                 }
10490                 if (map.max_entries && map.max_entries < pb->cpu_cnt)
10491                         pb->cpu_cnt = map.max_entries;
10492         }
10493
10494         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
10495         if (!pb->events) {
10496                 err = -ENOMEM;
10497                 pr_warn("failed to allocate events: out of memory\n");
10498                 goto error;
10499         }
10500         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
10501         if (!pb->cpu_bufs) {
10502                 err = -ENOMEM;
10503                 pr_warn("failed to allocate buffers: out of memory\n");
10504                 goto error;
10505         }
10506
10507         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
10508         if (err) {
10509                 pr_warn("failed to get online CPU mask: %d\n", err);
10510                 goto error;
10511         }
10512
10513         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
10514                 struct perf_cpu_buf *cpu_buf;
10515                 int cpu, map_key;
10516
10517                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
10518                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
10519
10520                 /* in case user didn't explicitly requested particular CPUs to
10521                  * be attached to, skip offline/not present CPUs
10522                  */
10523                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
10524                         continue;
10525
10526                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
10527                 if (IS_ERR(cpu_buf)) {
10528                         err = PTR_ERR(cpu_buf);
10529                         goto error;
10530                 }
10531
10532                 pb->cpu_bufs[j] = cpu_buf;
10533
10534                 err = bpf_map_update_elem(pb->map_fd, &map_key,
10535                                           &cpu_buf->fd, 0);
10536                 if (err) {
10537                         err = -errno;
10538                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
10539                                 cpu, map_key, cpu_buf->fd,
10540                                 libbpf_strerror_r(err, msg, sizeof(msg)));
10541                         goto error;
10542                 }
10543
10544                 pb->events[j].events = EPOLLIN;
10545                 pb->events[j].data.ptr = cpu_buf;
10546                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
10547                               &pb->events[j]) < 0) {
10548                         err = -errno;
10549                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
10550                                 cpu, cpu_buf->fd,
10551                                 libbpf_strerror_r(err, msg, sizeof(msg)));
10552                         goto error;
10553                 }
10554                 j++;
10555         }
10556         pb->cpu_cnt = j;
10557         free(online);
10558
10559         return pb;
10560
10561 error:
10562         free(online);
10563         if (pb)
10564                 perf_buffer__free(pb);
10565         return ERR_PTR(err);
10566 }
10567
10568 struct perf_sample_raw {
10569         struct perf_event_header header;
10570         uint32_t size;
10571         char data[];
10572 };
10573
10574 struct perf_sample_lost {
10575         struct perf_event_header header;
10576         uint64_t id;
10577         uint64_t lost;
10578         uint64_t sample_id;
10579 };
10580
10581 static enum bpf_perf_event_ret
10582 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
10583 {
10584         struct perf_cpu_buf *cpu_buf = ctx;
10585         struct perf_buffer *pb = cpu_buf->pb;
10586         void *data = e;
10587
10588         /* user wants full control over parsing perf event */
10589         if (pb->event_cb)
10590                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
10591
10592         switch (e->type) {
10593         case PERF_RECORD_SAMPLE: {
10594                 struct perf_sample_raw *s = data;
10595
10596                 if (pb->sample_cb)
10597                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
10598                 break;
10599         }
10600         case PERF_RECORD_LOST: {
10601                 struct perf_sample_lost *s = data;
10602
10603                 if (pb->lost_cb)
10604                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
10605                 break;
10606         }
10607         default:
10608                 pr_warn("unknown perf sample type %d\n", e->type);
10609                 return LIBBPF_PERF_EVENT_ERROR;
10610         }
10611         return LIBBPF_PERF_EVENT_CONT;
10612 }
10613
10614 static int perf_buffer__process_records(struct perf_buffer *pb,
10615                                         struct perf_cpu_buf *cpu_buf)
10616 {
10617         enum bpf_perf_event_ret ret;
10618
10619         ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
10620                                          pb->page_size, &cpu_buf->buf,
10621                                          &cpu_buf->buf_size,
10622                                          perf_buffer__process_record, cpu_buf);
10623         if (ret != LIBBPF_PERF_EVENT_CONT)
10624                 return ret;
10625         return 0;
10626 }
10627
10628 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
10629 {
10630         return pb->epoll_fd;
10631 }
10632
10633 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
10634 {
10635         int i, cnt, err;
10636
10637         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
10638         for (i = 0; i < cnt; i++) {
10639                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
10640
10641                 err = perf_buffer__process_records(pb, cpu_buf);
10642                 if (err) {
10643                         pr_warn("error while processing records: %d\n", err);
10644                         return err;
10645                 }
10646         }
10647         return cnt < 0 ? -errno : cnt;
10648 }
10649
10650 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
10651  * manager.
10652  */
10653 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
10654 {
10655         return pb->cpu_cnt;
10656 }
10657
10658 /*
10659  * Return perf_event FD of a ring buffer in *buf_idx* slot of
10660  * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
10661  * select()/poll()/epoll() Linux syscalls.
10662  */
10663 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
10664 {
10665         struct perf_cpu_buf *cpu_buf;
10666
10667         if (buf_idx >= pb->cpu_cnt)
10668                 return -EINVAL;
10669
10670         cpu_buf = pb->cpu_bufs[buf_idx];
10671         if (!cpu_buf)
10672                 return -ENOENT;
10673
10674         return cpu_buf->fd;
10675 }
10676
10677 /*
10678  * Consume data from perf ring buffer corresponding to slot *buf_idx* in
10679  * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
10680  * consume, do nothing and return success.
10681  * Returns:
10682  *   - 0 on success;
10683  *   - <0 on failure.
10684  */
10685 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
10686 {
10687         struct perf_cpu_buf *cpu_buf;
10688
10689         if (buf_idx >= pb->cpu_cnt)
10690                 return -EINVAL;
10691
10692         cpu_buf = pb->cpu_bufs[buf_idx];
10693         if (!cpu_buf)
10694                 return -ENOENT;
10695
10696         return perf_buffer__process_records(pb, cpu_buf);
10697 }
10698
10699 int perf_buffer__consume(struct perf_buffer *pb)
10700 {
10701         int i, err;
10702
10703         for (i = 0; i < pb->cpu_cnt; i++) {
10704                 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
10705
10706                 if (!cpu_buf)
10707                         continue;
10708
10709                 err = perf_buffer__process_records(pb, cpu_buf);
10710                 if (err) {
10711                         pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
10712                         return err;
10713                 }
10714         }
10715         return 0;
10716 }
10717
10718 struct bpf_prog_info_array_desc {
10719         int     array_offset;   /* e.g. offset of jited_prog_insns */
10720         int     count_offset;   /* e.g. offset of jited_prog_len */
10721         int     size_offset;    /* > 0: offset of rec size,
10722                                  * < 0: fix size of -size_offset
10723                                  */
10724 };
10725
10726 static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
10727         [BPF_PROG_INFO_JITED_INSNS] = {
10728                 offsetof(struct bpf_prog_info, jited_prog_insns),
10729                 offsetof(struct bpf_prog_info, jited_prog_len),
10730                 -1,
10731         },
10732         [BPF_PROG_INFO_XLATED_INSNS] = {
10733                 offsetof(struct bpf_prog_info, xlated_prog_insns),
10734                 offsetof(struct bpf_prog_info, xlated_prog_len),
10735                 -1,
10736         },
10737         [BPF_PROG_INFO_MAP_IDS] = {
10738                 offsetof(struct bpf_prog_info, map_ids),
10739                 offsetof(struct bpf_prog_info, nr_map_ids),
10740                 -(int)sizeof(__u32),
10741         },
10742         [BPF_PROG_INFO_JITED_KSYMS] = {
10743                 offsetof(struct bpf_prog_info, jited_ksyms),
10744                 offsetof(struct bpf_prog_info, nr_jited_ksyms),
10745                 -(int)sizeof(__u64),
10746         },
10747         [BPF_PROG_INFO_JITED_FUNC_LENS] = {
10748                 offsetof(struct bpf_prog_info, jited_func_lens),
10749                 offsetof(struct bpf_prog_info, nr_jited_func_lens),
10750                 -(int)sizeof(__u32),
10751         },
10752         [BPF_PROG_INFO_FUNC_INFO] = {
10753                 offsetof(struct bpf_prog_info, func_info),
10754                 offsetof(struct bpf_prog_info, nr_func_info),
10755                 offsetof(struct bpf_prog_info, func_info_rec_size),
10756         },
10757         [BPF_PROG_INFO_LINE_INFO] = {
10758                 offsetof(struct bpf_prog_info, line_info),
10759                 offsetof(struct bpf_prog_info, nr_line_info),
10760                 offsetof(struct bpf_prog_info, line_info_rec_size),
10761         },
10762         [BPF_PROG_INFO_JITED_LINE_INFO] = {
10763                 offsetof(struct bpf_prog_info, jited_line_info),
10764                 offsetof(struct bpf_prog_info, nr_jited_line_info),
10765                 offsetof(struct bpf_prog_info, jited_line_info_rec_size),
10766         },
10767         [BPF_PROG_INFO_PROG_TAGS] = {
10768                 offsetof(struct bpf_prog_info, prog_tags),
10769                 offsetof(struct bpf_prog_info, nr_prog_tags),
10770                 -(int)sizeof(__u8) * BPF_TAG_SIZE,
10771         },
10772
10773 };
10774
10775 static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
10776                                            int offset)
10777 {
10778         __u32 *array = (__u32 *)info;
10779
10780         if (offset >= 0)
10781                 return array[offset / sizeof(__u32)];
10782         return -(int)offset;
10783 }
10784
10785 static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
10786                                            int offset)
10787 {
10788         __u64 *array = (__u64 *)info;
10789
10790         if (offset >= 0)
10791                 return array[offset / sizeof(__u64)];
10792         return -(int)offset;
10793 }
10794
10795 static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
10796                                          __u32 val)
10797 {
10798         __u32 *array = (__u32 *)info;
10799
10800         if (offset >= 0)
10801                 array[offset / sizeof(__u32)] = val;
10802 }
10803
10804 static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
10805                                          __u64 val)
10806 {
10807         __u64 *array = (__u64 *)info;
10808
10809         if (offset >= 0)
10810                 array[offset / sizeof(__u64)] = val;
10811 }
10812
10813 struct bpf_prog_info_linear *
10814 bpf_program__get_prog_info_linear(int fd, __u64 arrays)
10815 {
10816         struct bpf_prog_info_linear *info_linear;
10817         struct bpf_prog_info info = {};
10818         __u32 info_len = sizeof(info);
10819         __u32 data_len = 0;
10820         int i, err;
10821         void *ptr;
10822
10823         if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
10824                 return ERR_PTR(-EINVAL);
10825
10826         /* step 1: get array dimensions */
10827         err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
10828         if (err) {
10829                 pr_debug("can't get prog info: %s", strerror(errno));
10830                 return ERR_PTR(-EFAULT);
10831         }
10832
10833         /* step 2: calculate total size of all arrays */
10834         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10835                 bool include_array = (arrays & (1UL << i)) > 0;
10836                 struct bpf_prog_info_array_desc *desc;
10837                 __u32 count, size;
10838
10839                 desc = bpf_prog_info_array_desc + i;
10840
10841                 /* kernel is too old to support this field */
10842                 if (info_len < desc->array_offset + sizeof(__u32) ||
10843                     info_len < desc->count_offset + sizeof(__u32) ||
10844                     (desc->size_offset > 0 && info_len < desc->size_offset))
10845                         include_array = false;
10846
10847                 if (!include_array) {
10848                         arrays &= ~(1UL << i);  /* clear the bit */
10849                         continue;
10850                 }
10851
10852                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
10853                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
10854
10855                 data_len += count * size;
10856         }
10857
10858         /* step 3: allocate continuous memory */
10859         data_len = roundup(data_len, sizeof(__u64));
10860         info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
10861         if (!info_linear)
10862                 return ERR_PTR(-ENOMEM);
10863
10864         /* step 4: fill data to info_linear->info */
10865         info_linear->arrays = arrays;
10866         memset(&info_linear->info, 0, sizeof(info));
10867         ptr = info_linear->data;
10868
10869         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10870                 struct bpf_prog_info_array_desc *desc;
10871                 __u32 count, size;
10872
10873                 if ((arrays & (1UL << i)) == 0)
10874                         continue;
10875
10876                 desc  = bpf_prog_info_array_desc + i;
10877                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
10878                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
10879                 bpf_prog_info_set_offset_u32(&info_linear->info,
10880                                              desc->count_offset, count);
10881                 bpf_prog_info_set_offset_u32(&info_linear->info,
10882                                              desc->size_offset, size);
10883                 bpf_prog_info_set_offset_u64(&info_linear->info,
10884                                              desc->array_offset,
10885                                              ptr_to_u64(ptr));
10886                 ptr += count * size;
10887         }
10888
10889         /* step 5: call syscall again to get required arrays */
10890         err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
10891         if (err) {
10892                 pr_debug("can't get prog info: %s", strerror(errno));
10893                 free(info_linear);
10894                 return ERR_PTR(-EFAULT);
10895         }
10896
10897         /* step 6: verify the data */
10898         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10899                 struct bpf_prog_info_array_desc *desc;
10900                 __u32 v1, v2;
10901
10902                 if ((arrays & (1UL << i)) == 0)
10903                         continue;
10904
10905                 desc = bpf_prog_info_array_desc + i;
10906                 v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
10907                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
10908                                                    desc->count_offset);
10909                 if (v1 != v2)
10910                         pr_warn("%s: mismatch in element count\n", __func__);
10911
10912                 v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
10913                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
10914                                                    desc->size_offset);
10915                 if (v1 != v2)
10916                         pr_warn("%s: mismatch in rec size\n", __func__);
10917         }
10918
10919         /* step 7: update info_len and data_len */
10920         info_linear->info_len = sizeof(struct bpf_prog_info);
10921         info_linear->data_len = data_len;
10922
10923         return info_linear;
10924 }
10925
10926 void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
10927 {
10928         int i;
10929
10930         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10931                 struct bpf_prog_info_array_desc *desc;
10932                 __u64 addr, offs;
10933
10934                 if ((info_linear->arrays & (1UL << i)) == 0)
10935                         continue;
10936
10937                 desc = bpf_prog_info_array_desc + i;
10938                 addr = bpf_prog_info_read_offset_u64(&info_linear->info,
10939                                                      desc->array_offset);
10940                 offs = addr - ptr_to_u64(info_linear->data);
10941                 bpf_prog_info_set_offset_u64(&info_linear->info,
10942                                              desc->array_offset, offs);
10943         }
10944 }
10945
10946 void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
10947 {
10948         int i;
10949
10950         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10951                 struct bpf_prog_info_array_desc *desc;
10952                 __u64 addr, offs;
10953
10954                 if ((info_linear->arrays & (1UL << i)) == 0)
10955                         continue;
10956
10957                 desc = bpf_prog_info_array_desc + i;
10958                 offs = bpf_prog_info_read_offset_u64(&info_linear->info,
10959                                                      desc->array_offset);
10960                 addr = offs + ptr_to_u64(info_linear->data);
10961                 bpf_prog_info_set_offset_u64(&info_linear->info,
10962                                              desc->array_offset, addr);
10963         }
10964 }
10965
10966 int bpf_program__set_attach_target(struct bpf_program *prog,
10967                                    int attach_prog_fd,
10968                                    const char *attach_func_name)
10969 {
10970         int btf_obj_fd = 0, btf_id = 0, err;
10971
10972         if (!prog || attach_prog_fd < 0 || !attach_func_name)
10973                 return -EINVAL;
10974
10975         if (prog->obj->loaded)
10976                 return -EINVAL;
10977
10978         if (attach_prog_fd) {
10979                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
10980                                                  attach_prog_fd);
10981                 if (btf_id < 0)
10982                         return btf_id;
10983         } else {
10984                 /* load btf_vmlinux, if not yet */
10985                 err = bpf_object__load_vmlinux_btf(prog->obj, true);
10986                 if (err)
10987                         return err;
10988                 err = find_kernel_btf_id(prog->obj, attach_func_name,
10989                                          prog->expected_attach_type,
10990                                          &btf_obj_fd, &btf_id);
10991                 if (err)
10992                         return err;
10993         }
10994
10995         prog->attach_btf_id = btf_id;
10996         prog->attach_btf_obj_fd = btf_obj_fd;
10997         prog->attach_prog_fd = attach_prog_fd;
10998         return 0;
10999 }
11000
11001 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
11002 {
11003         int err = 0, n, len, start, end = -1;
11004         bool *tmp;
11005
11006         *mask = NULL;
11007         *mask_sz = 0;
11008
11009         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
11010         while (*s) {
11011                 if (*s == ',' || *s == '\n') {
11012                         s++;
11013                         continue;
11014                 }
11015                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
11016                 if (n <= 0 || n > 2) {
11017                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
11018                         err = -EINVAL;
11019                         goto cleanup;
11020                 } else if (n == 1) {
11021                         end = start;
11022                 }
11023                 if (start < 0 || start > end) {
11024                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
11025                                 start, end, s);
11026                         err = -EINVAL;
11027                         goto cleanup;
11028                 }
11029                 tmp = realloc(*mask, end + 1);
11030                 if (!tmp) {
11031                         err = -ENOMEM;
11032                         goto cleanup;
11033                 }
11034                 *mask = tmp;
11035                 memset(tmp + *mask_sz, 0, start - *mask_sz);
11036                 memset(tmp + start, 1, end - start + 1);
11037                 *mask_sz = end + 1;
11038                 s += len;
11039         }
11040         if (!*mask_sz) {
11041                 pr_warn("Empty CPU range\n");
11042                 return -EINVAL;
11043         }
11044         return 0;
11045 cleanup:
11046         free(*mask);
11047         *mask = NULL;
11048         return err;
11049 }
11050
11051 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
11052 {
11053         int fd, err = 0, len;
11054         char buf[128];
11055
11056         fd = open(fcpu, O_RDONLY);
11057         if (fd < 0) {
11058                 err = -errno;
11059                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
11060                 return err;
11061         }
11062         len = read(fd, buf, sizeof(buf));
11063         close(fd);
11064         if (len <= 0) {
11065                 err = len ? -errno : -EINVAL;
11066                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
11067                 return err;
11068         }
11069         if (len >= sizeof(buf)) {
11070                 pr_warn("CPU mask is too big in file %s\n", fcpu);
11071                 return -E2BIG;
11072         }
11073         buf[len] = '\0';
11074
11075         return parse_cpu_mask_str(buf, mask, mask_sz);
11076 }
11077
11078 int libbpf_num_possible_cpus(void)
11079 {
11080         static const char *fcpu = "/sys/devices/system/cpu/possible";
11081         static int cpus;
11082         int err, n, i, tmp_cpus;
11083         bool *mask;
11084
11085         tmp_cpus = READ_ONCE(cpus);
11086         if (tmp_cpus > 0)
11087                 return tmp_cpus;
11088
11089         err = parse_cpu_mask_file(fcpu, &mask, &n);
11090         if (err)
11091                 return err;
11092
11093         tmp_cpus = 0;
11094         for (i = 0; i < n; i++) {
11095                 if (mask[i])
11096                         tmp_cpus++;
11097         }
11098         free(mask);
11099
11100         WRITE_ONCE(cpus, tmp_cpus);
11101         return tmp_cpus;
11102 }
11103
11104 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
11105                               const struct bpf_object_open_opts *opts)
11106 {
11107         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
11108                 .object_name = s->name,
11109         );
11110         struct bpf_object *obj;
11111         int i;
11112
11113         /* Attempt to preserve opts->object_name, unless overriden by user
11114          * explicitly. Overwriting object name for skeletons is discouraged,
11115          * as it breaks global data maps, because they contain object name
11116          * prefix as their own map name prefix. When skeleton is generated,
11117          * bpftool is making an assumption that this name will stay the same.
11118          */
11119         if (opts) {
11120                 memcpy(&skel_opts, opts, sizeof(*opts));
11121                 if (!opts->object_name)
11122                         skel_opts.object_name = s->name;
11123         }
11124
11125         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
11126         if (IS_ERR(obj)) {
11127                 pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
11128                         s->name, PTR_ERR(obj));
11129                 return PTR_ERR(obj);
11130         }
11131
11132         *s->obj = obj;
11133
11134         for (i = 0; i < s->map_cnt; i++) {
11135                 struct bpf_map **map = s->maps[i].map;
11136                 const char *name = s->maps[i].name;
11137                 void **mmaped = s->maps[i].mmaped;
11138
11139                 *map = bpf_object__find_map_by_name(obj, name);
11140                 if (!*map) {
11141                         pr_warn("failed to find skeleton map '%s'\n", name);
11142                         return -ESRCH;
11143                 }
11144
11145                 /* externs shouldn't be pre-setup from user code */
11146                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
11147                         *mmaped = (*map)->mmaped;
11148         }
11149
11150         for (i = 0; i < s->prog_cnt; i++) {
11151                 struct bpf_program **prog = s->progs[i].prog;
11152                 const char *name = s->progs[i].name;
11153
11154                 *prog = bpf_object__find_program_by_name(obj, name);
11155                 if (!*prog) {
11156                         pr_warn("failed to find skeleton program '%s'\n", name);
11157                         return -ESRCH;
11158                 }
11159         }
11160
11161         return 0;
11162 }
11163
11164 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
11165 {
11166         int i, err;
11167
11168         err = bpf_object__load(*s->obj);
11169         if (err) {
11170                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
11171                 return err;
11172         }
11173
11174         for (i = 0; i < s->map_cnt; i++) {
11175                 struct bpf_map *map = *s->maps[i].map;
11176                 size_t mmap_sz = bpf_map_mmap_sz(map);
11177                 int prot, map_fd = bpf_map__fd(map);
11178                 void **mmaped = s->maps[i].mmaped;
11179
11180                 if (!mmaped)
11181                         continue;
11182
11183                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
11184                         *mmaped = NULL;
11185                         continue;
11186                 }
11187
11188                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
11189                         prot = PROT_READ;
11190                 else
11191                         prot = PROT_READ | PROT_WRITE;
11192
11193                 /* Remap anonymous mmap()-ed "map initialization image" as
11194                  * a BPF map-backed mmap()-ed memory, but preserving the same
11195                  * memory address. This will cause kernel to change process'
11196                  * page table to point to a different piece of kernel memory,
11197                  * but from userspace point of view memory address (and its
11198                  * contents, being identical at this point) will stay the
11199                  * same. This mapping will be released by bpf_object__close()
11200                  * as per normal clean up procedure, so we don't need to worry
11201                  * about it from skeleton's clean up perspective.
11202                  */
11203                 *mmaped = mmap(map->mmaped, mmap_sz, prot,
11204                                 MAP_SHARED | MAP_FIXED, map_fd, 0);
11205                 if (*mmaped == MAP_FAILED) {
11206                         err = -errno;
11207                         *mmaped = NULL;
11208                         pr_warn("failed to re-mmap() map '%s': %d\n",
11209                                  bpf_map__name(map), err);
11210                         return err;
11211                 }
11212         }
11213
11214         return 0;
11215 }
11216
11217 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
11218 {
11219         int i;
11220
11221         for (i = 0; i < s->prog_cnt; i++) {
11222                 struct bpf_program *prog = *s->progs[i].prog;
11223                 struct bpf_link **link = s->progs[i].link;
11224                 const struct bpf_sec_def *sec_def;
11225
11226                 if (!prog->load)
11227                         continue;
11228
11229                 sec_def = find_sec_def(prog->sec_name);
11230                 if (!sec_def || !sec_def->attach_fn)
11231                         continue;
11232
11233                 *link = sec_def->attach_fn(sec_def, prog);
11234                 if (IS_ERR(*link)) {
11235                         pr_warn("failed to auto-attach program '%s': %ld\n",
11236                                 bpf_program__name(prog), PTR_ERR(*link));
11237                         return PTR_ERR(*link);
11238                 }
11239         }
11240
11241         return 0;
11242 }
11243
11244 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
11245 {
11246         int i;
11247
11248         for (i = 0; i < s->prog_cnt; i++) {
11249                 struct bpf_link **link = s->progs[i].link;
11250
11251                 bpf_link__destroy(*link);
11252                 *link = NULL;
11253         }
11254 }
11255
11256 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
11257 {
11258         if (s->progs)
11259                 bpf_object__detach_skeleton(s);
11260         if (s->obj)
11261                 bpf_object__close(*s->obj);
11262         free(s->maps);
11263         free(s->progs);
11264         free(s);
11265 }