2ae89b84a2be9743c462e3e708972e13949ddb7e
[linux-2.6-microblaze.git] / tools / lib / bpf / libbpf.c
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4  * Common eBPF ELF object loading operations.
5  *
6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8  * Copyright (C) 2015 Huawei Inc.
9  * Copyright (C) 2017 Nicira, Inc.
10  * Copyright (C) 2019 Isovalent, Inc.
11  */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <limits.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <endian.h>
25 #include <fcntl.h>
26 #include <errno.h>
27 #include <ctype.h>
28 #include <asm/unistd.h>
29 #include <linux/err.h>
30 #include <linux/kernel.h>
31 #include <linux/bpf.h>
32 #include <linux/btf.h>
33 #include <linux/filter.h>
34 #include <linux/list.h>
35 #include <linux/limits.h>
36 #include <linux/perf_event.h>
37 #include <linux/ring_buffer.h>
38 #include <linux/version.h>
39 #include <sys/epoll.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44 #include <sys/vfs.h>
45 #include <sys/utsname.h>
46 #include <sys/resource.h>
47 #include <libelf.h>
48 #include <gelf.h>
49 #include <zlib.h>
50
51 #include "libbpf.h"
52 #include "bpf.h"
53 #include "btf.h"
54 #include "str_error.h"
55 #include "libbpf_internal.h"
56 #include "hashmap.h"
57
58 #ifndef EM_BPF
59 #define EM_BPF 247
60 #endif
61
62 #ifndef BPF_FS_MAGIC
63 #define BPF_FS_MAGIC            0xcafe4a11
64 #endif
65
66 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
67  * compilation if user enables corresponding warning. Disable it explicitly.
68  */
69 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
70
71 #define __printf(a, b)  __attribute__((format(printf, a, b)))
72
73 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
74 static struct bpf_program *bpf_object__find_prog_by_idx(struct bpf_object *obj,
75                                                         int idx);
76 static const struct btf_type *
77 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
78
79 static int __base_pr(enum libbpf_print_level level, const char *format,
80                      va_list args)
81 {
82         if (level == LIBBPF_DEBUG)
83                 return 0;
84
85         return vfprintf(stderr, format, args);
86 }
87
88 static libbpf_print_fn_t __libbpf_pr = __base_pr;
89
90 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
91 {
92         libbpf_print_fn_t old_print_fn = __libbpf_pr;
93
94         __libbpf_pr = fn;
95         return old_print_fn;
96 }
97
98 __printf(2, 3)
99 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
100 {
101         va_list args;
102
103         if (!__libbpf_pr)
104                 return;
105
106         va_start(args, format);
107         __libbpf_pr(level, format, args);
108         va_end(args);
109 }
110
111 static void pr_perm_msg(int err)
112 {
113         struct rlimit limit;
114         char buf[100];
115
116         if (err != -EPERM || geteuid() != 0)
117                 return;
118
119         err = getrlimit(RLIMIT_MEMLOCK, &limit);
120         if (err)
121                 return;
122
123         if (limit.rlim_cur == RLIM_INFINITY)
124                 return;
125
126         if (limit.rlim_cur < 1024)
127                 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
128         else if (limit.rlim_cur < 1024*1024)
129                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
130         else
131                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
132
133         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
134                 buf);
135 }
136
137 #define STRERR_BUFSIZE  128
138
139 /* Copied from tools/perf/util/util.h */
140 #ifndef zfree
141 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
142 #endif
143
144 #ifndef zclose
145 # define zclose(fd) ({                  \
146         int ___err = 0;                 \
147         if ((fd) >= 0)                  \
148                 ___err = close((fd));   \
149         fd = -1;                        \
150         ___err; })
151 #endif
152
153 static inline __u64 ptr_to_u64(const void *ptr)
154 {
155         return (__u64) (unsigned long) ptr;
156 }
157
158 enum kern_feature_id {
159         /* v4.14: kernel support for program & map names. */
160         FEAT_PROG_NAME,
161         /* v5.2: kernel support for global data sections. */
162         FEAT_GLOBAL_DATA,
163         /* BTF support */
164         FEAT_BTF,
165         /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
166         FEAT_BTF_FUNC,
167         /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
168         FEAT_BTF_DATASEC,
169         /* BTF_FUNC_GLOBAL is supported */
170         FEAT_BTF_GLOBAL_FUNC,
171         /* BPF_F_MMAPABLE is supported for arrays */
172         FEAT_ARRAY_MMAP,
173         /* kernel support for expected_attach_type in BPF_PROG_LOAD */
174         FEAT_EXP_ATTACH_TYPE,
175         /* bpf_probe_read_{kernel,user}[_str] helpers */
176         FEAT_PROBE_READ_KERN,
177         __FEAT_CNT,
178 };
179
180 static bool kernel_supports(enum kern_feature_id feat_id);
181
182 enum reloc_type {
183         RELO_LD64,
184         RELO_CALL,
185         RELO_DATA,
186         RELO_EXTERN,
187 };
188
189 struct reloc_desc {
190         enum reloc_type type;
191         int insn_idx;
192         int map_idx;
193         int sym_off;
194 };
195
196 struct bpf_sec_def;
197
198 typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
199                                         struct bpf_program *prog);
200
201 struct bpf_sec_def {
202         const char *sec;
203         size_t len;
204         enum bpf_prog_type prog_type;
205         enum bpf_attach_type expected_attach_type;
206         bool is_exp_attach_type_optional;
207         bool is_attachable;
208         bool is_attach_btf;
209         attach_fn_t attach_fn;
210 };
211
212 /*
213  * bpf_prog should be a better name but it has been used in
214  * linux/filter.h.
215  */
216 struct bpf_program {
217         /* Index in elf obj file, for relocation use. */
218         int idx;
219         char *name;
220         int prog_ifindex;
221         char *section_name;
222         const struct bpf_sec_def *sec_def;
223         /* section_name with / replaced by _; makes recursive pinning
224          * in bpf_object__pin_programs easier
225          */
226         char *pin_name;
227         struct bpf_insn *insns;
228         size_t insns_cnt, main_prog_cnt;
229         enum bpf_prog_type type;
230         bool load;
231
232         struct reloc_desc *reloc_desc;
233         int nr_reloc;
234         int log_level;
235
236         struct {
237                 int nr;
238                 int *fds;
239         } instances;
240         bpf_program_prep_t preprocessor;
241
242         struct bpf_object *obj;
243         void *priv;
244         bpf_program_clear_priv_t clear_priv;
245
246         enum bpf_attach_type expected_attach_type;
247         __u32 attach_btf_id;
248         __u32 attach_prog_fd;
249         void *func_info;
250         __u32 func_info_rec_size;
251         __u32 func_info_cnt;
252
253         void *line_info;
254         __u32 line_info_rec_size;
255         __u32 line_info_cnt;
256         __u32 prog_flags;
257 };
258
259 struct bpf_struct_ops {
260         const char *tname;
261         const struct btf_type *type;
262         struct bpf_program **progs;
263         __u32 *kern_func_off;
264         /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
265         void *data;
266         /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
267          *      btf_vmlinux's format.
268          * struct bpf_struct_ops_tcp_congestion_ops {
269          *      [... some other kernel fields ...]
270          *      struct tcp_congestion_ops data;
271          * }
272          * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
273          * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
274          * from "data".
275          */
276         void *kern_vdata;
277         __u32 type_id;
278 };
279
280 #define DATA_SEC ".data"
281 #define BSS_SEC ".bss"
282 #define RODATA_SEC ".rodata"
283 #define KCONFIG_SEC ".kconfig"
284 #define KSYMS_SEC ".ksyms"
285 #define STRUCT_OPS_SEC ".struct_ops"
286
287 enum libbpf_map_type {
288         LIBBPF_MAP_UNSPEC,
289         LIBBPF_MAP_DATA,
290         LIBBPF_MAP_BSS,
291         LIBBPF_MAP_RODATA,
292         LIBBPF_MAP_KCONFIG,
293 };
294
295 static const char * const libbpf_type_to_btf_name[] = {
296         [LIBBPF_MAP_DATA]       = DATA_SEC,
297         [LIBBPF_MAP_BSS]        = BSS_SEC,
298         [LIBBPF_MAP_RODATA]     = RODATA_SEC,
299         [LIBBPF_MAP_KCONFIG]    = KCONFIG_SEC,
300 };
301
302 struct bpf_map {
303         char *name;
304         int fd;
305         int sec_idx;
306         size_t sec_offset;
307         int map_ifindex;
308         int inner_map_fd;
309         struct bpf_map_def def;
310         __u32 numa_node;
311         __u32 btf_var_idx;
312         __u32 btf_key_type_id;
313         __u32 btf_value_type_id;
314         __u32 btf_vmlinux_value_type_id;
315         void *priv;
316         bpf_map_clear_priv_t clear_priv;
317         enum libbpf_map_type libbpf_type;
318         void *mmaped;
319         struct bpf_struct_ops *st_ops;
320         struct bpf_map *inner_map;
321         void **init_slots;
322         int init_slots_sz;
323         char *pin_path;
324         bool pinned;
325         bool reused;
326 };
327
328 enum extern_type {
329         EXT_UNKNOWN,
330         EXT_KCFG,
331         EXT_KSYM,
332 };
333
334 enum kcfg_type {
335         KCFG_UNKNOWN,
336         KCFG_CHAR,
337         KCFG_BOOL,
338         KCFG_INT,
339         KCFG_TRISTATE,
340         KCFG_CHAR_ARR,
341 };
342
343 struct extern_desc {
344         enum extern_type type;
345         int sym_idx;
346         int btf_id;
347         int sec_btf_id;
348         const char *name;
349         bool is_set;
350         bool is_weak;
351         union {
352                 struct {
353                         enum kcfg_type type;
354                         int sz;
355                         int align;
356                         int data_off;
357                         bool is_signed;
358                 } kcfg;
359                 struct {
360                         unsigned long long addr;
361                 } ksym;
362         };
363 };
364
365 static LIST_HEAD(bpf_objects_list);
366
367 struct bpf_object {
368         char name[BPF_OBJ_NAME_LEN];
369         char license[64];
370         __u32 kern_version;
371
372         struct bpf_program *programs;
373         size_t nr_programs;
374         struct bpf_map *maps;
375         size_t nr_maps;
376         size_t maps_cap;
377
378         char *kconfig;
379         struct extern_desc *externs;
380         int nr_extern;
381         int kconfig_map_idx;
382
383         bool loaded;
384         bool has_pseudo_calls;
385
386         /*
387          * Information when doing elf related work. Only valid if fd
388          * is valid.
389          */
390         struct {
391                 int fd;
392                 const void *obj_buf;
393                 size_t obj_buf_sz;
394                 Elf *elf;
395                 GElf_Ehdr ehdr;
396                 Elf_Data *symbols;
397                 Elf_Data *data;
398                 Elf_Data *rodata;
399                 Elf_Data *bss;
400                 Elf_Data *st_ops_data;
401                 size_t shstrndx; /* section index for section name strings */
402                 size_t strtabidx;
403                 struct {
404                         GElf_Shdr shdr;
405                         Elf_Data *data;
406                 } *reloc_sects;
407                 int nr_reloc_sects;
408                 int maps_shndx;
409                 int btf_maps_shndx;
410                 __u32 btf_maps_sec_btf_id;
411                 int text_shndx;
412                 int symbols_shndx;
413                 int data_shndx;
414                 int rodata_shndx;
415                 int bss_shndx;
416                 int st_ops_shndx;
417         } efile;
418         /*
419          * All loaded bpf_object is linked in a list, which is
420          * hidden to caller. bpf_objects__<func> handlers deal with
421          * all objects.
422          */
423         struct list_head list;
424
425         struct btf *btf;
426         /* Parse and load BTF vmlinux if any of the programs in the object need
427          * it at load time.
428          */
429         struct btf *btf_vmlinux;
430         struct btf_ext *btf_ext;
431
432         void *priv;
433         bpf_object_clear_priv_t clear_priv;
434
435         char path[];
436 };
437 #define obj_elf_valid(o)        ((o)->efile.elf)
438
439 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
440 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
441 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
442 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
443 static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
444 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
445 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
446
447 void bpf_program__unload(struct bpf_program *prog)
448 {
449         int i;
450
451         if (!prog)
452                 return;
453
454         /*
455          * If the object is opened but the program was never loaded,
456          * it is possible that prog->instances.nr == -1.
457          */
458         if (prog->instances.nr > 0) {
459                 for (i = 0; i < prog->instances.nr; i++)
460                         zclose(prog->instances.fds[i]);
461         } else if (prog->instances.nr != -1) {
462                 pr_warn("Internal error: instances.nr is %d\n",
463                         prog->instances.nr);
464         }
465
466         prog->instances.nr = -1;
467         zfree(&prog->instances.fds);
468
469         zfree(&prog->func_info);
470         zfree(&prog->line_info);
471 }
472
473 static void bpf_program__exit(struct bpf_program *prog)
474 {
475         if (!prog)
476                 return;
477
478         if (prog->clear_priv)
479                 prog->clear_priv(prog, prog->priv);
480
481         prog->priv = NULL;
482         prog->clear_priv = NULL;
483
484         bpf_program__unload(prog);
485         zfree(&prog->name);
486         zfree(&prog->section_name);
487         zfree(&prog->pin_name);
488         zfree(&prog->insns);
489         zfree(&prog->reloc_desc);
490
491         prog->nr_reloc = 0;
492         prog->insns_cnt = 0;
493         prog->idx = -1;
494 }
495
496 static char *__bpf_program__pin_name(struct bpf_program *prog)
497 {
498         char *name, *p;
499
500         name = p = strdup(prog->section_name);
501         while ((p = strchr(p, '/')))
502                 *p = '_';
503
504         return name;
505 }
506
507 static int
508 bpf_program__init(void *data, size_t size, const char *section_name, int idx,
509                   struct bpf_program *prog)
510 {
511         const size_t bpf_insn_sz = sizeof(struct bpf_insn);
512
513         if (size == 0 || size % bpf_insn_sz) {
514                 pr_warn("corrupted section '%s', size: %zu\n",
515                         section_name, size);
516                 return -EINVAL;
517         }
518
519         memset(prog, 0, sizeof(*prog));
520
521         prog->section_name = strdup(section_name);
522         if (!prog->section_name) {
523                 pr_warn("failed to alloc name for prog under section(%d) %s\n",
524                         idx, section_name);
525                 goto errout;
526         }
527
528         prog->pin_name = __bpf_program__pin_name(prog);
529         if (!prog->pin_name) {
530                 pr_warn("failed to alloc pin name for prog under section(%d) %s\n",
531                         idx, section_name);
532                 goto errout;
533         }
534
535         prog->insns = malloc(size);
536         if (!prog->insns) {
537                 pr_warn("failed to alloc insns for prog under section %s\n",
538                         section_name);
539                 goto errout;
540         }
541         prog->insns_cnt = size / bpf_insn_sz;
542         memcpy(prog->insns, data, size);
543         prog->idx = idx;
544         prog->instances.fds = NULL;
545         prog->instances.nr = -1;
546         prog->type = BPF_PROG_TYPE_UNSPEC;
547         prog->load = true;
548
549         return 0;
550 errout:
551         bpf_program__exit(prog);
552         return -ENOMEM;
553 }
554
555 static int
556 bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
557                         const char *section_name, int idx)
558 {
559         struct bpf_program prog, *progs;
560         int nr_progs, err;
561
562         err = bpf_program__init(data, size, section_name, idx, &prog);
563         if (err)
564                 return err;
565
566         progs = obj->programs;
567         nr_progs = obj->nr_programs;
568
569         progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
570         if (!progs) {
571                 /*
572                  * In this case the original obj->programs
573                  * is still valid, so don't need special treat for
574                  * bpf_close_object().
575                  */
576                 pr_warn("failed to alloc a new program under section '%s'\n",
577                         section_name);
578                 bpf_program__exit(&prog);
579                 return -ENOMEM;
580         }
581
582         pr_debug("elf: found program '%s'\n", prog.section_name);
583         obj->programs = progs;
584         obj->nr_programs = nr_progs + 1;
585         prog.obj = obj;
586         progs[nr_progs] = prog;
587         return 0;
588 }
589
590 static int
591 bpf_object__init_prog_names(struct bpf_object *obj)
592 {
593         Elf_Data *symbols = obj->efile.symbols;
594         struct bpf_program *prog;
595         size_t pi, si;
596
597         for (pi = 0; pi < obj->nr_programs; pi++) {
598                 const char *name = NULL;
599
600                 prog = &obj->programs[pi];
601
602                 for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; si++) {
603                         GElf_Sym sym;
604
605                         if (!gelf_getsym(symbols, si, &sym))
606                                 continue;
607                         if (sym.st_shndx != prog->idx)
608                                 continue;
609                         if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
610                                 continue;
611
612                         name = elf_sym_str(obj, sym.st_name);
613                         if (!name) {
614                                 pr_warn("prog '%s': failed to get symbol name\n",
615                                         prog->section_name);
616                                 return -LIBBPF_ERRNO__LIBELF;
617                         }
618                 }
619
620                 if (!name && prog->idx == obj->efile.text_shndx)
621                         name = ".text";
622
623                 if (!name) {
624                         pr_warn("prog '%s': failed to find program symbol\n",
625                                 prog->section_name);
626                         return -EINVAL;
627                 }
628
629                 prog->name = strdup(name);
630                 if (!prog->name)
631                         return -ENOMEM;
632         }
633
634         return 0;
635 }
636
637 static __u32 get_kernel_version(void)
638 {
639         __u32 major, minor, patch;
640         struct utsname info;
641
642         uname(&info);
643         if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
644                 return 0;
645         return KERNEL_VERSION(major, minor, patch);
646 }
647
648 static const struct btf_member *
649 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
650 {
651         struct btf_member *m;
652         int i;
653
654         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
655                 if (btf_member_bit_offset(t, i) == bit_offset)
656                         return m;
657         }
658
659         return NULL;
660 }
661
662 static const struct btf_member *
663 find_member_by_name(const struct btf *btf, const struct btf_type *t,
664                     const char *name)
665 {
666         struct btf_member *m;
667         int i;
668
669         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
670                 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
671                         return m;
672         }
673
674         return NULL;
675 }
676
677 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
678 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
679                                    const char *name, __u32 kind);
680
681 static int
682 find_struct_ops_kern_types(const struct btf *btf, const char *tname,
683                            const struct btf_type **type, __u32 *type_id,
684                            const struct btf_type **vtype, __u32 *vtype_id,
685                            const struct btf_member **data_member)
686 {
687         const struct btf_type *kern_type, *kern_vtype;
688         const struct btf_member *kern_data_member;
689         __s32 kern_vtype_id, kern_type_id;
690         __u32 i;
691
692         kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
693         if (kern_type_id < 0) {
694                 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
695                         tname);
696                 return kern_type_id;
697         }
698         kern_type = btf__type_by_id(btf, kern_type_id);
699
700         /* Find the corresponding "map_value" type that will be used
701          * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
702          * find "struct bpf_struct_ops_tcp_congestion_ops" from the
703          * btf_vmlinux.
704          */
705         kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
706                                                 tname, BTF_KIND_STRUCT);
707         if (kern_vtype_id < 0) {
708                 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
709                         STRUCT_OPS_VALUE_PREFIX, tname);
710                 return kern_vtype_id;
711         }
712         kern_vtype = btf__type_by_id(btf, kern_vtype_id);
713
714         /* Find "struct tcp_congestion_ops" from
715          * struct bpf_struct_ops_tcp_congestion_ops {
716          *      [ ... ]
717          *      struct tcp_congestion_ops data;
718          * }
719          */
720         kern_data_member = btf_members(kern_vtype);
721         for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
722                 if (kern_data_member->type == kern_type_id)
723                         break;
724         }
725         if (i == btf_vlen(kern_vtype)) {
726                 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
727                         tname, STRUCT_OPS_VALUE_PREFIX, tname);
728                 return -EINVAL;
729         }
730
731         *type = kern_type;
732         *type_id = kern_type_id;
733         *vtype = kern_vtype;
734         *vtype_id = kern_vtype_id;
735         *data_member = kern_data_member;
736
737         return 0;
738 }
739
740 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
741 {
742         return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
743 }
744
745 /* Init the map's fields that depend on kern_btf */
746 static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
747                                          const struct btf *btf,
748                                          const struct btf *kern_btf)
749 {
750         const struct btf_member *member, *kern_member, *kern_data_member;
751         const struct btf_type *type, *kern_type, *kern_vtype;
752         __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
753         struct bpf_struct_ops *st_ops;
754         void *data, *kern_data;
755         const char *tname;
756         int err;
757
758         st_ops = map->st_ops;
759         type = st_ops->type;
760         tname = st_ops->tname;
761         err = find_struct_ops_kern_types(kern_btf, tname,
762                                          &kern_type, &kern_type_id,
763                                          &kern_vtype, &kern_vtype_id,
764                                          &kern_data_member);
765         if (err)
766                 return err;
767
768         pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
769                  map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
770
771         map->def.value_size = kern_vtype->size;
772         map->btf_vmlinux_value_type_id = kern_vtype_id;
773
774         st_ops->kern_vdata = calloc(1, kern_vtype->size);
775         if (!st_ops->kern_vdata)
776                 return -ENOMEM;
777
778         data = st_ops->data;
779         kern_data_off = kern_data_member->offset / 8;
780         kern_data = st_ops->kern_vdata + kern_data_off;
781
782         member = btf_members(type);
783         for (i = 0; i < btf_vlen(type); i++, member++) {
784                 const struct btf_type *mtype, *kern_mtype;
785                 __u32 mtype_id, kern_mtype_id;
786                 void *mdata, *kern_mdata;
787                 __s64 msize, kern_msize;
788                 __u32 moff, kern_moff;
789                 __u32 kern_member_idx;
790                 const char *mname;
791
792                 mname = btf__name_by_offset(btf, member->name_off);
793                 kern_member = find_member_by_name(kern_btf, kern_type, mname);
794                 if (!kern_member) {
795                         pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
796                                 map->name, mname);
797                         return -ENOTSUP;
798                 }
799
800                 kern_member_idx = kern_member - btf_members(kern_type);
801                 if (btf_member_bitfield_size(type, i) ||
802                     btf_member_bitfield_size(kern_type, kern_member_idx)) {
803                         pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
804                                 map->name, mname);
805                         return -ENOTSUP;
806                 }
807
808                 moff = member->offset / 8;
809                 kern_moff = kern_member->offset / 8;
810
811                 mdata = data + moff;
812                 kern_mdata = kern_data + kern_moff;
813
814                 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
815                 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
816                                                     &kern_mtype_id);
817                 if (BTF_INFO_KIND(mtype->info) !=
818                     BTF_INFO_KIND(kern_mtype->info)) {
819                         pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
820                                 map->name, mname, BTF_INFO_KIND(mtype->info),
821                                 BTF_INFO_KIND(kern_mtype->info));
822                         return -ENOTSUP;
823                 }
824
825                 if (btf_is_ptr(mtype)) {
826                         struct bpf_program *prog;
827
828                         mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id);
829                         kern_mtype = skip_mods_and_typedefs(kern_btf,
830                                                             kern_mtype->type,
831                                                             &kern_mtype_id);
832                         if (!btf_is_func_proto(mtype) ||
833                             !btf_is_func_proto(kern_mtype)) {
834                                 pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n",
835                                         map->name, mname);
836                                 return -ENOTSUP;
837                         }
838
839                         prog = st_ops->progs[i];
840                         if (!prog) {
841                                 pr_debug("struct_ops init_kern %s: func ptr %s is not set\n",
842                                          map->name, mname);
843                                 continue;
844                         }
845
846                         prog->attach_btf_id = kern_type_id;
847                         prog->expected_attach_type = kern_member_idx;
848
849                         st_ops->kern_func_off[i] = kern_data_off + kern_moff;
850
851                         pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
852                                  map->name, mname, prog->name, moff,
853                                  kern_moff);
854
855                         continue;
856                 }
857
858                 msize = btf__resolve_size(btf, mtype_id);
859                 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
860                 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
861                         pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
862                                 map->name, mname, (ssize_t)msize,
863                                 (ssize_t)kern_msize);
864                         return -ENOTSUP;
865                 }
866
867                 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
868                          map->name, mname, (unsigned int)msize,
869                          moff, kern_moff);
870                 memcpy(kern_mdata, mdata, msize);
871         }
872
873         return 0;
874 }
875
876 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
877 {
878         struct bpf_map *map;
879         size_t i;
880         int err;
881
882         for (i = 0; i < obj->nr_maps; i++) {
883                 map = &obj->maps[i];
884
885                 if (!bpf_map__is_struct_ops(map))
886                         continue;
887
888                 err = bpf_map__init_kern_struct_ops(map, obj->btf,
889                                                     obj->btf_vmlinux);
890                 if (err)
891                         return err;
892         }
893
894         return 0;
895 }
896
897 static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
898 {
899         const struct btf_type *type, *datasec;
900         const struct btf_var_secinfo *vsi;
901         struct bpf_struct_ops *st_ops;
902         const char *tname, *var_name;
903         __s32 type_id, datasec_id;
904         const struct btf *btf;
905         struct bpf_map *map;
906         __u32 i;
907
908         if (obj->efile.st_ops_shndx == -1)
909                 return 0;
910
911         btf = obj->btf;
912         datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
913                                             BTF_KIND_DATASEC);
914         if (datasec_id < 0) {
915                 pr_warn("struct_ops init: DATASEC %s not found\n",
916                         STRUCT_OPS_SEC);
917                 return -EINVAL;
918         }
919
920         datasec = btf__type_by_id(btf, datasec_id);
921         vsi = btf_var_secinfos(datasec);
922         for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
923                 type = btf__type_by_id(obj->btf, vsi->type);
924                 var_name = btf__name_by_offset(obj->btf, type->name_off);
925
926                 type_id = btf__resolve_type(obj->btf, vsi->type);
927                 if (type_id < 0) {
928                         pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
929                                 vsi->type, STRUCT_OPS_SEC);
930                         return -EINVAL;
931                 }
932
933                 type = btf__type_by_id(obj->btf, type_id);
934                 tname = btf__name_by_offset(obj->btf, type->name_off);
935                 if (!tname[0]) {
936                         pr_warn("struct_ops init: anonymous type is not supported\n");
937                         return -ENOTSUP;
938                 }
939                 if (!btf_is_struct(type)) {
940                         pr_warn("struct_ops init: %s is not a struct\n", tname);
941                         return -EINVAL;
942                 }
943
944                 map = bpf_object__add_map(obj);
945                 if (IS_ERR(map))
946                         return PTR_ERR(map);
947
948                 map->sec_idx = obj->efile.st_ops_shndx;
949                 map->sec_offset = vsi->offset;
950                 map->name = strdup(var_name);
951                 if (!map->name)
952                         return -ENOMEM;
953
954                 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
955                 map->def.key_size = sizeof(int);
956                 map->def.value_size = type->size;
957                 map->def.max_entries = 1;
958
959                 map->st_ops = calloc(1, sizeof(*map->st_ops));
960                 if (!map->st_ops)
961                         return -ENOMEM;
962                 st_ops = map->st_ops;
963                 st_ops->data = malloc(type->size);
964                 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
965                 st_ops->kern_func_off = malloc(btf_vlen(type) *
966                                                sizeof(*st_ops->kern_func_off));
967                 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
968                         return -ENOMEM;
969
970                 if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
971                         pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
972                                 var_name, STRUCT_OPS_SEC);
973                         return -EINVAL;
974                 }
975
976                 memcpy(st_ops->data,
977                        obj->efile.st_ops_data->d_buf + vsi->offset,
978                        type->size);
979                 st_ops->tname = tname;
980                 st_ops->type = type;
981                 st_ops->type_id = type_id;
982
983                 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
984                          tname, type_id, var_name, vsi->offset);
985         }
986
987         return 0;
988 }
989
990 static struct bpf_object *bpf_object__new(const char *path,
991                                           const void *obj_buf,
992                                           size_t obj_buf_sz,
993                                           const char *obj_name)
994 {
995         struct bpf_object *obj;
996         char *end;
997
998         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
999         if (!obj) {
1000                 pr_warn("alloc memory failed for %s\n", path);
1001                 return ERR_PTR(-ENOMEM);
1002         }
1003
1004         strcpy(obj->path, path);
1005         if (obj_name) {
1006                 strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
1007                 obj->name[sizeof(obj->name) - 1] = 0;
1008         } else {
1009                 /* Using basename() GNU version which doesn't modify arg. */
1010                 strncpy(obj->name, basename((void *)path),
1011                         sizeof(obj->name) - 1);
1012                 end = strchr(obj->name, '.');
1013                 if (end)
1014                         *end = 0;
1015         }
1016
1017         obj->efile.fd = -1;
1018         /*
1019          * Caller of this function should also call
1020          * bpf_object__elf_finish() after data collection to return
1021          * obj_buf to user. If not, we should duplicate the buffer to
1022          * avoid user freeing them before elf finish.
1023          */
1024         obj->efile.obj_buf = obj_buf;
1025         obj->efile.obj_buf_sz = obj_buf_sz;
1026         obj->efile.maps_shndx = -1;
1027         obj->efile.btf_maps_shndx = -1;
1028         obj->efile.data_shndx = -1;
1029         obj->efile.rodata_shndx = -1;
1030         obj->efile.bss_shndx = -1;
1031         obj->efile.st_ops_shndx = -1;
1032         obj->kconfig_map_idx = -1;
1033
1034         obj->kern_version = get_kernel_version();
1035         obj->loaded = false;
1036
1037         INIT_LIST_HEAD(&obj->list);
1038         list_add(&obj->list, &bpf_objects_list);
1039         return obj;
1040 }
1041
1042 static void bpf_object__elf_finish(struct bpf_object *obj)
1043 {
1044         if (!obj_elf_valid(obj))
1045                 return;
1046
1047         if (obj->efile.elf) {
1048                 elf_end(obj->efile.elf);
1049                 obj->efile.elf = NULL;
1050         }
1051         obj->efile.symbols = NULL;
1052         obj->efile.data = NULL;
1053         obj->efile.rodata = NULL;
1054         obj->efile.bss = NULL;
1055         obj->efile.st_ops_data = NULL;
1056
1057         zfree(&obj->efile.reloc_sects);
1058         obj->efile.nr_reloc_sects = 0;
1059         zclose(obj->efile.fd);
1060         obj->efile.obj_buf = NULL;
1061         obj->efile.obj_buf_sz = 0;
1062 }
1063
1064 /* if libelf is old and doesn't support mmap(), fall back to read() */
1065 #ifndef ELF_C_READ_MMAP
1066 #define ELF_C_READ_MMAP ELF_C_READ
1067 #endif
1068
1069 static int bpf_object__elf_init(struct bpf_object *obj)
1070 {
1071         int err = 0;
1072         GElf_Ehdr *ep;
1073
1074         if (obj_elf_valid(obj)) {
1075                 pr_warn("elf: init internal error\n");
1076                 return -LIBBPF_ERRNO__LIBELF;
1077         }
1078
1079         if (obj->efile.obj_buf_sz > 0) {
1080                 /*
1081                  * obj_buf should have been validated by
1082                  * bpf_object__open_buffer().
1083                  */
1084                 obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
1085                                             obj->efile.obj_buf_sz);
1086         } else {
1087                 obj->efile.fd = open(obj->path, O_RDONLY);
1088                 if (obj->efile.fd < 0) {
1089                         char errmsg[STRERR_BUFSIZE], *cp;
1090
1091                         err = -errno;
1092                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1093                         pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1094                         return err;
1095                 }
1096
1097                 obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1098         }
1099
1100         if (!obj->efile.elf) {
1101                 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1102                 err = -LIBBPF_ERRNO__LIBELF;
1103                 goto errout;
1104         }
1105
1106         if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
1107                 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1108                 err = -LIBBPF_ERRNO__FORMAT;
1109                 goto errout;
1110         }
1111         ep = &obj->efile.ehdr;
1112
1113         if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
1114                 pr_warn("elf: failed to get section names section index for %s: %s\n",
1115                         obj->path, elf_errmsg(-1));
1116                 err = -LIBBPF_ERRNO__FORMAT;
1117                 goto errout;
1118         }
1119
1120         /* Elf is corrupted/truncated, avoid calling elf_strptr. */
1121         if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
1122                 pr_warn("elf: failed to get section names strings from %s: %s\n",
1123                         obj->path, elf_errmsg(-1));
1124                 return -LIBBPF_ERRNO__FORMAT;
1125         }
1126
1127         /* Old LLVM set e_machine to EM_NONE */
1128         if (ep->e_type != ET_REL ||
1129             (ep->e_machine && ep->e_machine != EM_BPF)) {
1130                 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1131                 err = -LIBBPF_ERRNO__FORMAT;
1132                 goto errout;
1133         }
1134
1135         return 0;
1136 errout:
1137         bpf_object__elf_finish(obj);
1138         return err;
1139 }
1140
1141 static int bpf_object__check_endianness(struct bpf_object *obj)
1142 {
1143 #if __BYTE_ORDER == __LITTLE_ENDIAN
1144         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
1145                 return 0;
1146 #elif __BYTE_ORDER == __BIG_ENDIAN
1147         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
1148                 return 0;
1149 #else
1150 # error "Unrecognized __BYTE_ORDER__"
1151 #endif
1152         pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1153         return -LIBBPF_ERRNO__ENDIAN;
1154 }
1155
1156 static int
1157 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1158 {
1159         memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
1160         pr_debug("license of %s is %s\n", obj->path, obj->license);
1161         return 0;
1162 }
1163
1164 static int
1165 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1166 {
1167         __u32 kver;
1168
1169         if (size != sizeof(kver)) {
1170                 pr_warn("invalid kver section in %s\n", obj->path);
1171                 return -LIBBPF_ERRNO__FORMAT;
1172         }
1173         memcpy(&kver, data, sizeof(kver));
1174         obj->kern_version = kver;
1175         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1176         return 0;
1177 }
1178
1179 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1180 {
1181         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1182             type == BPF_MAP_TYPE_HASH_OF_MAPS)
1183                 return true;
1184         return false;
1185 }
1186
1187 int bpf_object__section_size(const struct bpf_object *obj, const char *name,
1188                              __u32 *size)
1189 {
1190         int ret = -ENOENT;
1191
1192         *size = 0;
1193         if (!name) {
1194                 return -EINVAL;
1195         } else if (!strcmp(name, DATA_SEC)) {
1196                 if (obj->efile.data)
1197                         *size = obj->efile.data->d_size;
1198         } else if (!strcmp(name, BSS_SEC)) {
1199                 if (obj->efile.bss)
1200                         *size = obj->efile.bss->d_size;
1201         } else if (!strcmp(name, RODATA_SEC)) {
1202                 if (obj->efile.rodata)
1203                         *size = obj->efile.rodata->d_size;
1204         } else if (!strcmp(name, STRUCT_OPS_SEC)) {
1205                 if (obj->efile.st_ops_data)
1206                         *size = obj->efile.st_ops_data->d_size;
1207         } else {
1208                 Elf_Scn *scn = elf_sec_by_name(obj, name);
1209                 Elf_Data *data = elf_sec_data(obj, scn);
1210
1211                 if (data) {
1212                         ret = 0; /* found it */
1213                         *size = data->d_size;
1214                 }
1215         }
1216
1217         return *size ? 0 : ret;
1218 }
1219
1220 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
1221                                 __u32 *off)
1222 {
1223         Elf_Data *symbols = obj->efile.symbols;
1224         const char *sname;
1225         size_t si;
1226
1227         if (!name || !off)
1228                 return -EINVAL;
1229
1230         for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
1231                 GElf_Sym sym;
1232
1233                 if (!gelf_getsym(symbols, si, &sym))
1234                         continue;
1235                 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1236                     GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
1237                         continue;
1238
1239                 sname = elf_sym_str(obj, sym.st_name);
1240                 if (!sname) {
1241                         pr_warn("failed to get sym name string for var %s\n",
1242                                 name);
1243                         return -EIO;
1244                 }
1245                 if (strcmp(name, sname) == 0) {
1246                         *off = sym.st_value;
1247                         return 0;
1248                 }
1249         }
1250
1251         return -ENOENT;
1252 }
1253
1254 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1255 {
1256         struct bpf_map *new_maps;
1257         size_t new_cap;
1258         int i;
1259
1260         if (obj->nr_maps < obj->maps_cap)
1261                 return &obj->maps[obj->nr_maps++];
1262
1263         new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
1264         new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
1265         if (!new_maps) {
1266                 pr_warn("alloc maps for object failed\n");
1267                 return ERR_PTR(-ENOMEM);
1268         }
1269
1270         obj->maps_cap = new_cap;
1271         obj->maps = new_maps;
1272
1273         /* zero out new maps */
1274         memset(obj->maps + obj->nr_maps, 0,
1275                (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
1276         /*
1277          * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
1278          * when failure (zclose won't close negative fd)).
1279          */
1280         for (i = obj->nr_maps; i < obj->maps_cap; i++) {
1281                 obj->maps[i].fd = -1;
1282                 obj->maps[i].inner_map_fd = -1;
1283         }
1284
1285         return &obj->maps[obj->nr_maps++];
1286 }
1287
1288 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1289 {
1290         long page_sz = sysconf(_SC_PAGE_SIZE);
1291         size_t map_sz;
1292
1293         map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
1294         map_sz = roundup(map_sz, page_sz);
1295         return map_sz;
1296 }
1297
1298 static char *internal_map_name(struct bpf_object *obj,
1299                                enum libbpf_map_type type)
1300 {
1301         char map_name[BPF_OBJ_NAME_LEN], *p;
1302         const char *sfx = libbpf_type_to_btf_name[type];
1303         int sfx_len = max((size_t)7, strlen(sfx));
1304         int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
1305                           strlen(obj->name));
1306
1307         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1308                  sfx_len, libbpf_type_to_btf_name[type]);
1309
1310         /* sanitise map name to characters allowed by kernel */
1311         for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1312                 if (!isalnum(*p) && *p != '_' && *p != '.')
1313                         *p = '_';
1314
1315         return strdup(map_name);
1316 }
1317
1318 static int
1319 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1320                               int sec_idx, void *data, size_t data_sz)
1321 {
1322         struct bpf_map_def *def;
1323         struct bpf_map *map;
1324         int err;
1325
1326         map = bpf_object__add_map(obj);
1327         if (IS_ERR(map))
1328                 return PTR_ERR(map);
1329
1330         map->libbpf_type = type;
1331         map->sec_idx = sec_idx;
1332         map->sec_offset = 0;
1333         map->name = internal_map_name(obj, type);
1334         if (!map->name) {
1335                 pr_warn("failed to alloc map name\n");
1336                 return -ENOMEM;
1337         }
1338
1339         def = &map->def;
1340         def->type = BPF_MAP_TYPE_ARRAY;
1341         def->key_size = sizeof(int);
1342         def->value_size = data_sz;
1343         def->max_entries = 1;
1344         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1345                          ? BPF_F_RDONLY_PROG : 0;
1346         def->map_flags |= BPF_F_MMAPABLE;
1347
1348         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1349                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
1350
1351         map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
1352                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1353         if (map->mmaped == MAP_FAILED) {
1354                 err = -errno;
1355                 map->mmaped = NULL;
1356                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1357                         map->name, err);
1358                 zfree(&map->name);
1359                 return err;
1360         }
1361
1362         if (data)
1363                 memcpy(map->mmaped, data, data_sz);
1364
1365         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1366         return 0;
1367 }
1368
1369 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1370 {
1371         int err;
1372
1373         /*
1374          * Populate obj->maps with libbpf internal maps.
1375          */
1376         if (obj->efile.data_shndx >= 0) {
1377                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1378                                                     obj->efile.data_shndx,
1379                                                     obj->efile.data->d_buf,
1380                                                     obj->efile.data->d_size);
1381                 if (err)
1382                         return err;
1383         }
1384         if (obj->efile.rodata_shndx >= 0) {
1385                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1386                                                     obj->efile.rodata_shndx,
1387                                                     obj->efile.rodata->d_buf,
1388                                                     obj->efile.rodata->d_size);
1389                 if (err)
1390                         return err;
1391         }
1392         if (obj->efile.bss_shndx >= 0) {
1393                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1394                                                     obj->efile.bss_shndx,
1395                                                     NULL,
1396                                                     obj->efile.bss->d_size);
1397                 if (err)
1398                         return err;
1399         }
1400         return 0;
1401 }
1402
1403
1404 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1405                                                const void *name)
1406 {
1407         int i;
1408
1409         for (i = 0; i < obj->nr_extern; i++) {
1410                 if (strcmp(obj->externs[i].name, name) == 0)
1411                         return &obj->externs[i];
1412         }
1413         return NULL;
1414 }
1415
1416 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1417                               char value)
1418 {
1419         switch (ext->kcfg.type) {
1420         case KCFG_BOOL:
1421                 if (value == 'm') {
1422                         pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
1423                                 ext->name, value);
1424                         return -EINVAL;
1425                 }
1426                 *(bool *)ext_val = value == 'y' ? true : false;
1427                 break;
1428         case KCFG_TRISTATE:
1429                 if (value == 'y')
1430                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1431                 else if (value == 'm')
1432                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1433                 else /* value == 'n' */
1434                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1435                 break;
1436         case KCFG_CHAR:
1437                 *(char *)ext_val = value;
1438                 break;
1439         case KCFG_UNKNOWN:
1440         case KCFG_INT:
1441         case KCFG_CHAR_ARR:
1442         default:
1443                 pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
1444                         ext->name, value);
1445                 return -EINVAL;
1446         }
1447         ext->is_set = true;
1448         return 0;
1449 }
1450
1451 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1452                               const char *value)
1453 {
1454         size_t len;
1455
1456         if (ext->kcfg.type != KCFG_CHAR_ARR) {
1457                 pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
1458                 return -EINVAL;
1459         }
1460
1461         len = strlen(value);
1462         if (value[len - 1] != '"') {
1463                 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1464                         ext->name, value);
1465                 return -EINVAL;
1466         }
1467
1468         /* strip quotes */
1469         len -= 2;
1470         if (len >= ext->kcfg.sz) {
1471                 pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
1472                         ext->name, value, len, ext->kcfg.sz - 1);
1473                 len = ext->kcfg.sz - 1;
1474         }
1475         memcpy(ext_val, value + 1, len);
1476         ext_val[len] = '\0';
1477         ext->is_set = true;
1478         return 0;
1479 }
1480
1481 static int parse_u64(const char *value, __u64 *res)
1482 {
1483         char *value_end;
1484         int err;
1485
1486         errno = 0;
1487         *res = strtoull(value, &value_end, 0);
1488         if (errno) {
1489                 err = -errno;
1490                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1491                 return err;
1492         }
1493         if (*value_end) {
1494                 pr_warn("failed to parse '%s' as integer completely\n", value);
1495                 return -EINVAL;
1496         }
1497         return 0;
1498 }
1499
1500 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1501 {
1502         int bit_sz = ext->kcfg.sz * 8;
1503
1504         if (ext->kcfg.sz == 8)
1505                 return true;
1506
1507         /* Validate that value stored in u64 fits in integer of `ext->sz`
1508          * bytes size without any loss of information. If the target integer
1509          * is signed, we rely on the following limits of integer type of
1510          * Y bits and subsequent transformation:
1511          *
1512          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1513          *            0 <= X + 2^(Y-1) <= 2^Y - 1
1514          *            0 <= X + 2^(Y-1) <  2^Y
1515          *
1516          *  For unsigned target integer, check that all the (64 - Y) bits are
1517          *  zero.
1518          */
1519         if (ext->kcfg.is_signed)
1520                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1521         else
1522                 return (v >> bit_sz) == 0;
1523 }
1524
1525 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1526                               __u64 value)
1527 {
1528         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1529                 pr_warn("extern (kcfg) %s=%llu should be integer\n",
1530                         ext->name, (unsigned long long)value);
1531                 return -EINVAL;
1532         }
1533         if (!is_kcfg_value_in_range(ext, value)) {
1534                 pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
1535                         ext->name, (unsigned long long)value, ext->kcfg.sz);
1536                 return -ERANGE;
1537         }
1538         switch (ext->kcfg.sz) {
1539                 case 1: *(__u8 *)ext_val = value; break;
1540                 case 2: *(__u16 *)ext_val = value; break;
1541                 case 4: *(__u32 *)ext_val = value; break;
1542                 case 8: *(__u64 *)ext_val = value; break;
1543                 default:
1544                         return -EINVAL;
1545         }
1546         ext->is_set = true;
1547         return 0;
1548 }
1549
1550 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1551                                             char *buf, void *data)
1552 {
1553         struct extern_desc *ext;
1554         char *sep, *value;
1555         int len, err = 0;
1556         void *ext_val;
1557         __u64 num;
1558
1559         if (strncmp(buf, "CONFIG_", 7))
1560                 return 0;
1561
1562         sep = strchr(buf, '=');
1563         if (!sep) {
1564                 pr_warn("failed to parse '%s': no separator\n", buf);
1565                 return -EINVAL;
1566         }
1567
1568         /* Trim ending '\n' */
1569         len = strlen(buf);
1570         if (buf[len - 1] == '\n')
1571                 buf[len - 1] = '\0';
1572         /* Split on '=' and ensure that a value is present. */
1573         *sep = '\0';
1574         if (!sep[1]) {
1575                 *sep = '=';
1576                 pr_warn("failed to parse '%s': no value\n", buf);
1577                 return -EINVAL;
1578         }
1579
1580         ext = find_extern_by_name(obj, buf);
1581         if (!ext || ext->is_set)
1582                 return 0;
1583
1584         ext_val = data + ext->kcfg.data_off;
1585         value = sep + 1;
1586
1587         switch (*value) {
1588         case 'y': case 'n': case 'm':
1589                 err = set_kcfg_value_tri(ext, ext_val, *value);
1590                 break;
1591         case '"':
1592                 err = set_kcfg_value_str(ext, ext_val, value);
1593                 break;
1594         default:
1595                 /* assume integer */
1596                 err = parse_u64(value, &num);
1597                 if (err) {
1598                         pr_warn("extern (kcfg) %s=%s should be integer\n",
1599                                 ext->name, value);
1600                         return err;
1601                 }
1602                 err = set_kcfg_value_num(ext, ext_val, num);
1603                 break;
1604         }
1605         if (err)
1606                 return err;
1607         pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
1608         return 0;
1609 }
1610
1611 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1612 {
1613         char buf[PATH_MAX];
1614         struct utsname uts;
1615         int len, err = 0;
1616         gzFile file;
1617
1618         uname(&uts);
1619         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1620         if (len < 0)
1621                 return -EINVAL;
1622         else if (len >= PATH_MAX)
1623                 return -ENAMETOOLONG;
1624
1625         /* gzopen also accepts uncompressed files. */
1626         file = gzopen(buf, "r");
1627         if (!file)
1628                 file = gzopen("/proc/config.gz", "r");
1629
1630         if (!file) {
1631                 pr_warn("failed to open system Kconfig\n");
1632                 return -ENOENT;
1633         }
1634
1635         while (gzgets(file, buf, sizeof(buf))) {
1636                 err = bpf_object__process_kconfig_line(obj, buf, data);
1637                 if (err) {
1638                         pr_warn("error parsing system Kconfig line '%s': %d\n",
1639                                 buf, err);
1640                         goto out;
1641                 }
1642         }
1643
1644 out:
1645         gzclose(file);
1646         return err;
1647 }
1648
1649 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1650                                         const char *config, void *data)
1651 {
1652         char buf[PATH_MAX];
1653         int err = 0;
1654         FILE *file;
1655
1656         file = fmemopen((void *)config, strlen(config), "r");
1657         if (!file) {
1658                 err = -errno;
1659                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
1660                 return err;
1661         }
1662
1663         while (fgets(buf, sizeof(buf), file)) {
1664                 err = bpf_object__process_kconfig_line(obj, buf, data);
1665                 if (err) {
1666                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1667                                 buf, err);
1668                         break;
1669                 }
1670         }
1671
1672         fclose(file);
1673         return err;
1674 }
1675
1676 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
1677 {
1678         struct extern_desc *last_ext = NULL, *ext;
1679         size_t map_sz;
1680         int i, err;
1681
1682         for (i = 0; i < obj->nr_extern; i++) {
1683                 ext = &obj->externs[i];
1684                 if (ext->type == EXT_KCFG)
1685                         last_ext = ext;
1686         }
1687
1688         if (!last_ext)
1689                 return 0;
1690
1691         map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
1692         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
1693                                             obj->efile.symbols_shndx,
1694                                             NULL, map_sz);
1695         if (err)
1696                 return err;
1697
1698         obj->kconfig_map_idx = obj->nr_maps - 1;
1699
1700         return 0;
1701 }
1702
1703 static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
1704 {
1705         Elf_Data *symbols = obj->efile.symbols;
1706         int i, map_def_sz = 0, nr_maps = 0, nr_syms;
1707         Elf_Data *data = NULL;
1708         Elf_Scn *scn;
1709
1710         if (obj->efile.maps_shndx < 0)
1711                 return 0;
1712
1713         if (!symbols)
1714                 return -EINVAL;
1715
1716
1717         scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
1718         data = elf_sec_data(obj, scn);
1719         if (!scn || !data) {
1720                 pr_warn("elf: failed to get legacy map definitions for %s\n",
1721                         obj->path);
1722                 return -EINVAL;
1723         }
1724
1725         /*
1726          * Count number of maps. Each map has a name.
1727          * Array of maps is not supported: only the first element is
1728          * considered.
1729          *
1730          * TODO: Detect array of map and report error.
1731          */
1732         nr_syms = symbols->d_size / sizeof(GElf_Sym);
1733         for (i = 0; i < nr_syms; i++) {
1734                 GElf_Sym sym;
1735
1736                 if (!gelf_getsym(symbols, i, &sym))
1737                         continue;
1738                 if (sym.st_shndx != obj->efile.maps_shndx)
1739                         continue;
1740                 nr_maps++;
1741         }
1742         /* Assume equally sized map definitions */
1743         pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
1744                  nr_maps, data->d_size, obj->path);
1745
1746         if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
1747                 pr_warn("elf: unable to determine legacy map definition size in %s\n",
1748                         obj->path);
1749                 return -EINVAL;
1750         }
1751         map_def_sz = data->d_size / nr_maps;
1752
1753         /* Fill obj->maps using data in "maps" section.  */
1754         for (i = 0; i < nr_syms; i++) {
1755                 GElf_Sym sym;
1756                 const char *map_name;
1757                 struct bpf_map_def *def;
1758                 struct bpf_map *map;
1759
1760                 if (!gelf_getsym(symbols, i, &sym))
1761                         continue;
1762                 if (sym.st_shndx != obj->efile.maps_shndx)
1763                         continue;
1764
1765                 map = bpf_object__add_map(obj);
1766                 if (IS_ERR(map))
1767                         return PTR_ERR(map);
1768
1769                 map_name = elf_sym_str(obj, sym.st_name);
1770                 if (!map_name) {
1771                         pr_warn("failed to get map #%d name sym string for obj %s\n",
1772                                 i, obj->path);
1773                         return -LIBBPF_ERRNO__FORMAT;
1774                 }
1775
1776                 map->libbpf_type = LIBBPF_MAP_UNSPEC;
1777                 map->sec_idx = sym.st_shndx;
1778                 map->sec_offset = sym.st_value;
1779                 pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
1780                          map_name, map->sec_idx, map->sec_offset);
1781                 if (sym.st_value + map_def_sz > data->d_size) {
1782                         pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
1783                                 obj->path, map_name);
1784                         return -EINVAL;
1785                 }
1786
1787                 map->name = strdup(map_name);
1788                 if (!map->name) {
1789                         pr_warn("failed to alloc map name\n");
1790                         return -ENOMEM;
1791                 }
1792                 pr_debug("map %d is \"%s\"\n", i, map->name);
1793                 def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
1794                 /*
1795                  * If the definition of the map in the object file fits in
1796                  * bpf_map_def, copy it.  Any extra fields in our version
1797                  * of bpf_map_def will default to zero as a result of the
1798                  * calloc above.
1799                  */
1800                 if (map_def_sz <= sizeof(struct bpf_map_def)) {
1801                         memcpy(&map->def, def, map_def_sz);
1802                 } else {
1803                         /*
1804                          * Here the map structure being read is bigger than what
1805                          * we expect, truncate if the excess bits are all zero.
1806                          * If they are not zero, reject this map as
1807                          * incompatible.
1808                          */
1809                         char *b;
1810
1811                         for (b = ((char *)def) + sizeof(struct bpf_map_def);
1812                              b < ((char *)def) + map_def_sz; b++) {
1813                                 if (*b != 0) {
1814                                         pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
1815                                                 obj->path, map_name);
1816                                         if (strict)
1817                                                 return -EINVAL;
1818                                 }
1819                         }
1820                         memcpy(&map->def, def, sizeof(struct bpf_map_def));
1821                 }
1822         }
1823         return 0;
1824 }
1825
1826 static const struct btf_type *
1827 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
1828 {
1829         const struct btf_type *t = btf__type_by_id(btf, id);
1830
1831         if (res_id)
1832                 *res_id = id;
1833
1834         while (btf_is_mod(t) || btf_is_typedef(t)) {
1835                 if (res_id)
1836                         *res_id = t->type;
1837                 t = btf__type_by_id(btf, t->type);
1838         }
1839
1840         return t;
1841 }
1842
1843 static const struct btf_type *
1844 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
1845 {
1846         const struct btf_type *t;
1847
1848         t = skip_mods_and_typedefs(btf, id, NULL);
1849         if (!btf_is_ptr(t))
1850                 return NULL;
1851
1852         t = skip_mods_and_typedefs(btf, t->type, res_id);
1853
1854         return btf_is_func_proto(t) ? t : NULL;
1855 }
1856
1857 static const char *btf_kind_str(const struct btf_type *t)
1858 {
1859         switch (btf_kind(t)) {
1860         case BTF_KIND_UNKN: return "void";
1861         case BTF_KIND_INT: return "int";
1862         case BTF_KIND_PTR: return "ptr";
1863         case BTF_KIND_ARRAY: return "array";
1864         case BTF_KIND_STRUCT: return "struct";
1865         case BTF_KIND_UNION: return "union";
1866         case BTF_KIND_ENUM: return "enum";
1867         case BTF_KIND_FWD: return "fwd";
1868         case BTF_KIND_TYPEDEF: return "typedef";
1869         case BTF_KIND_VOLATILE: return "volatile";
1870         case BTF_KIND_CONST: return "const";
1871         case BTF_KIND_RESTRICT: return "restrict";
1872         case BTF_KIND_FUNC: return "func";
1873         case BTF_KIND_FUNC_PROTO: return "func_proto";
1874         case BTF_KIND_VAR: return "var";
1875         case BTF_KIND_DATASEC: return "datasec";
1876         default: return "unknown";
1877         }
1878 }
1879
1880 /*
1881  * Fetch integer attribute of BTF map definition. Such attributes are
1882  * represented using a pointer to an array, in which dimensionality of array
1883  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
1884  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
1885  * type definition, while using only sizeof(void *) space in ELF data section.
1886  */
1887 static bool get_map_field_int(const char *map_name, const struct btf *btf,
1888                               const struct btf_member *m, __u32 *res)
1889 {
1890         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
1891         const char *name = btf__name_by_offset(btf, m->name_off);
1892         const struct btf_array *arr_info;
1893         const struct btf_type *arr_t;
1894
1895         if (!btf_is_ptr(t)) {
1896                 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
1897                         map_name, name, btf_kind_str(t));
1898                 return false;
1899         }
1900
1901         arr_t = btf__type_by_id(btf, t->type);
1902         if (!arr_t) {
1903                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
1904                         map_name, name, t->type);
1905                 return false;
1906         }
1907         if (!btf_is_array(arr_t)) {
1908                 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
1909                         map_name, name, btf_kind_str(arr_t));
1910                 return false;
1911         }
1912         arr_info = btf_array(arr_t);
1913         *res = arr_info->nelems;
1914         return true;
1915 }
1916
1917 static int build_map_pin_path(struct bpf_map *map, const char *path)
1918 {
1919         char buf[PATH_MAX];
1920         int len;
1921
1922         if (!path)
1923                 path = "/sys/fs/bpf";
1924
1925         len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
1926         if (len < 0)
1927                 return -EINVAL;
1928         else if (len >= PATH_MAX)
1929                 return -ENAMETOOLONG;
1930
1931         return bpf_map__set_pin_path(map, buf);
1932 }
1933
1934
1935 static int parse_btf_map_def(struct bpf_object *obj,
1936                              struct bpf_map *map,
1937                              const struct btf_type *def,
1938                              bool strict, bool is_inner,
1939                              const char *pin_root_path)
1940 {
1941         const struct btf_type *t;
1942         const struct btf_member *m;
1943         int vlen, i;
1944
1945         vlen = btf_vlen(def);
1946         m = btf_members(def);
1947         for (i = 0; i < vlen; i++, m++) {
1948                 const char *name = btf__name_by_offset(obj->btf, m->name_off);
1949
1950                 if (!name) {
1951                         pr_warn("map '%s': invalid field #%d.\n", map->name, i);
1952                         return -EINVAL;
1953                 }
1954                 if (strcmp(name, "type") == 0) {
1955                         if (!get_map_field_int(map->name, obj->btf, m,
1956                                                &map->def.type))
1957                                 return -EINVAL;
1958                         pr_debug("map '%s': found type = %u.\n",
1959                                  map->name, map->def.type);
1960                 } else if (strcmp(name, "max_entries") == 0) {
1961                         if (!get_map_field_int(map->name, obj->btf, m,
1962                                                &map->def.max_entries))
1963                                 return -EINVAL;
1964                         pr_debug("map '%s': found max_entries = %u.\n",
1965                                  map->name, map->def.max_entries);
1966                 } else if (strcmp(name, "map_flags") == 0) {
1967                         if (!get_map_field_int(map->name, obj->btf, m,
1968                                                &map->def.map_flags))
1969                                 return -EINVAL;
1970                         pr_debug("map '%s': found map_flags = %u.\n",
1971                                  map->name, map->def.map_flags);
1972                 } else if (strcmp(name, "numa_node") == 0) {
1973                         if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
1974                                 return -EINVAL;
1975                         pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
1976                 } else if (strcmp(name, "key_size") == 0) {
1977                         __u32 sz;
1978
1979                         if (!get_map_field_int(map->name, obj->btf, m, &sz))
1980                                 return -EINVAL;
1981                         pr_debug("map '%s': found key_size = %u.\n",
1982                                  map->name, sz);
1983                         if (map->def.key_size && map->def.key_size != sz) {
1984                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
1985                                         map->name, map->def.key_size, sz);
1986                                 return -EINVAL;
1987                         }
1988                         map->def.key_size = sz;
1989                 } else if (strcmp(name, "key") == 0) {
1990                         __s64 sz;
1991
1992                         t = btf__type_by_id(obj->btf, m->type);
1993                         if (!t) {
1994                                 pr_warn("map '%s': key type [%d] not found.\n",
1995                                         map->name, m->type);
1996                                 return -EINVAL;
1997                         }
1998                         if (!btf_is_ptr(t)) {
1999                                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2000                                         map->name, btf_kind_str(t));
2001                                 return -EINVAL;
2002                         }
2003                         sz = btf__resolve_size(obj->btf, t->type);
2004                         if (sz < 0) {
2005                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2006                                         map->name, t->type, (ssize_t)sz);
2007                                 return sz;
2008                         }
2009                         pr_debug("map '%s': found key [%u], sz = %zd.\n",
2010                                  map->name, t->type, (ssize_t)sz);
2011                         if (map->def.key_size && map->def.key_size != sz) {
2012                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2013                                         map->name, map->def.key_size, (ssize_t)sz);
2014                                 return -EINVAL;
2015                         }
2016                         map->def.key_size = sz;
2017                         map->btf_key_type_id = t->type;
2018                 } else if (strcmp(name, "value_size") == 0) {
2019                         __u32 sz;
2020
2021                         if (!get_map_field_int(map->name, obj->btf, m, &sz))
2022                                 return -EINVAL;
2023                         pr_debug("map '%s': found value_size = %u.\n",
2024                                  map->name, sz);
2025                         if (map->def.value_size && map->def.value_size != sz) {
2026                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2027                                         map->name, map->def.value_size, sz);
2028                                 return -EINVAL;
2029                         }
2030                         map->def.value_size = sz;
2031                 } else if (strcmp(name, "value") == 0) {
2032                         __s64 sz;
2033
2034                         t = btf__type_by_id(obj->btf, m->type);
2035                         if (!t) {
2036                                 pr_warn("map '%s': value type [%d] not found.\n",
2037                                         map->name, m->type);
2038                                 return -EINVAL;
2039                         }
2040                         if (!btf_is_ptr(t)) {
2041                                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2042                                         map->name, btf_kind_str(t));
2043                                 return -EINVAL;
2044                         }
2045                         sz = btf__resolve_size(obj->btf, t->type);
2046                         if (sz < 0) {
2047                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2048                                         map->name, t->type, (ssize_t)sz);
2049                                 return sz;
2050                         }
2051                         pr_debug("map '%s': found value [%u], sz = %zd.\n",
2052                                  map->name, t->type, (ssize_t)sz);
2053                         if (map->def.value_size && map->def.value_size != sz) {
2054                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2055                                         map->name, map->def.value_size, (ssize_t)sz);
2056                                 return -EINVAL;
2057                         }
2058                         map->def.value_size = sz;
2059                         map->btf_value_type_id = t->type;
2060                 }
2061                 else if (strcmp(name, "values") == 0) {
2062                         int err;
2063
2064                         if (is_inner) {
2065                                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2066                                         map->name);
2067                                 return -ENOTSUP;
2068                         }
2069                         if (i != vlen - 1) {
2070                                 pr_warn("map '%s': '%s' member should be last.\n",
2071                                         map->name, name);
2072                                 return -EINVAL;
2073                         }
2074                         if (!bpf_map_type__is_map_in_map(map->def.type)) {
2075                                 pr_warn("map '%s': should be map-in-map.\n",
2076                                         map->name);
2077                                 return -ENOTSUP;
2078                         }
2079                         if (map->def.value_size && map->def.value_size != 4) {
2080                                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2081                                         map->name, map->def.value_size);
2082                                 return -EINVAL;
2083                         }
2084                         map->def.value_size = 4;
2085                         t = btf__type_by_id(obj->btf, m->type);
2086                         if (!t) {
2087                                 pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
2088                                         map->name, m->type);
2089                                 return -EINVAL;
2090                         }
2091                         if (!btf_is_array(t) || btf_array(t)->nelems) {
2092                                 pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
2093                                         map->name);
2094                                 return -EINVAL;
2095                         }
2096                         t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
2097                                                    NULL);
2098                         if (!btf_is_ptr(t)) {
2099                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2100                                         map->name, btf_kind_str(t));
2101                                 return -EINVAL;
2102                         }
2103                         t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
2104                         if (!btf_is_struct(t)) {
2105                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2106                                         map->name, btf_kind_str(t));
2107                                 return -EINVAL;
2108                         }
2109
2110                         map->inner_map = calloc(1, sizeof(*map->inner_map));
2111                         if (!map->inner_map)
2112                                 return -ENOMEM;
2113                         map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
2114                         map->inner_map->name = malloc(strlen(map->name) +
2115                                                       sizeof(".inner") + 1);
2116                         if (!map->inner_map->name)
2117                                 return -ENOMEM;
2118                         sprintf(map->inner_map->name, "%s.inner", map->name);
2119
2120                         err = parse_btf_map_def(obj, map->inner_map, t, strict,
2121                                                 true /* is_inner */, NULL);
2122                         if (err)
2123                                 return err;
2124                 } else if (strcmp(name, "pinning") == 0) {
2125                         __u32 val;
2126                         int err;
2127
2128                         if (is_inner) {
2129                                 pr_debug("map '%s': inner def can't be pinned.\n",
2130                                          map->name);
2131                                 return -EINVAL;
2132                         }
2133                         if (!get_map_field_int(map->name, obj->btf, m, &val))
2134                                 return -EINVAL;
2135                         pr_debug("map '%s': found pinning = %u.\n",
2136                                  map->name, val);
2137
2138                         if (val != LIBBPF_PIN_NONE &&
2139                             val != LIBBPF_PIN_BY_NAME) {
2140                                 pr_warn("map '%s': invalid pinning value %u.\n",
2141                                         map->name, val);
2142                                 return -EINVAL;
2143                         }
2144                         if (val == LIBBPF_PIN_BY_NAME) {
2145                                 err = build_map_pin_path(map, pin_root_path);
2146                                 if (err) {
2147                                         pr_warn("map '%s': couldn't build pin path.\n",
2148                                                 map->name);
2149                                         return err;
2150                                 }
2151                         }
2152                 } else {
2153                         if (strict) {
2154                                 pr_warn("map '%s': unknown field '%s'.\n",
2155                                         map->name, name);
2156                                 return -ENOTSUP;
2157                         }
2158                         pr_debug("map '%s': ignoring unknown field '%s'.\n",
2159                                  map->name, name);
2160                 }
2161         }
2162
2163         if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
2164                 pr_warn("map '%s': map type isn't specified.\n", map->name);
2165                 return -EINVAL;
2166         }
2167
2168         return 0;
2169 }
2170
2171 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2172                                          const struct btf_type *sec,
2173                                          int var_idx, int sec_idx,
2174                                          const Elf_Data *data, bool strict,
2175                                          const char *pin_root_path)
2176 {
2177         const struct btf_type *var, *def;
2178         const struct btf_var_secinfo *vi;
2179         const struct btf_var *var_extra;
2180         const char *map_name;
2181         struct bpf_map *map;
2182
2183         vi = btf_var_secinfos(sec) + var_idx;
2184         var = btf__type_by_id(obj->btf, vi->type);
2185         var_extra = btf_var(var);
2186         map_name = btf__name_by_offset(obj->btf, var->name_off);
2187
2188         if (map_name == NULL || map_name[0] == '\0') {
2189                 pr_warn("map #%d: empty name.\n", var_idx);
2190                 return -EINVAL;
2191         }
2192         if ((__u64)vi->offset + vi->size > data->d_size) {
2193                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2194                 return -EINVAL;
2195         }
2196         if (!btf_is_var(var)) {
2197                 pr_warn("map '%s': unexpected var kind %s.\n",
2198                         map_name, btf_kind_str(var));
2199                 return -EINVAL;
2200         }
2201         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
2202             var_extra->linkage != BTF_VAR_STATIC) {
2203                 pr_warn("map '%s': unsupported var linkage %u.\n",
2204                         map_name, var_extra->linkage);
2205                 return -EOPNOTSUPP;
2206         }
2207
2208         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2209         if (!btf_is_struct(def)) {
2210                 pr_warn("map '%s': unexpected def kind %s.\n",
2211                         map_name, btf_kind_str(var));
2212                 return -EINVAL;
2213         }
2214         if (def->size > vi->size) {
2215                 pr_warn("map '%s': invalid def size.\n", map_name);
2216                 return -EINVAL;
2217         }
2218
2219         map = bpf_object__add_map(obj);
2220         if (IS_ERR(map))
2221                 return PTR_ERR(map);
2222         map->name = strdup(map_name);
2223         if (!map->name) {
2224                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2225                 return -ENOMEM;
2226         }
2227         map->libbpf_type = LIBBPF_MAP_UNSPEC;
2228         map->def.type = BPF_MAP_TYPE_UNSPEC;
2229         map->sec_idx = sec_idx;
2230         map->sec_offset = vi->offset;
2231         map->btf_var_idx = var_idx;
2232         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2233                  map_name, map->sec_idx, map->sec_offset);
2234
2235         return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
2236 }
2237
2238 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2239                                           const char *pin_root_path)
2240 {
2241         const struct btf_type *sec = NULL;
2242         int nr_types, i, vlen, err;
2243         const struct btf_type *t;
2244         const char *name;
2245         Elf_Data *data;
2246         Elf_Scn *scn;
2247
2248         if (obj->efile.btf_maps_shndx < 0)
2249                 return 0;
2250
2251         scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2252         data = elf_sec_data(obj, scn);
2253         if (!scn || !data) {
2254                 pr_warn("elf: failed to get %s map definitions for %s\n",
2255                         MAPS_ELF_SEC, obj->path);
2256                 return -EINVAL;
2257         }
2258
2259         nr_types = btf__get_nr_types(obj->btf);
2260         for (i = 1; i <= nr_types; i++) {
2261                 t = btf__type_by_id(obj->btf, i);
2262                 if (!btf_is_datasec(t))
2263                         continue;
2264                 name = btf__name_by_offset(obj->btf, t->name_off);
2265                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2266                         sec = t;
2267                         obj->efile.btf_maps_sec_btf_id = i;
2268                         break;
2269                 }
2270         }
2271
2272         if (!sec) {
2273                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2274                 return -ENOENT;
2275         }
2276
2277         vlen = btf_vlen(sec);
2278         for (i = 0; i < vlen; i++) {
2279                 err = bpf_object__init_user_btf_map(obj, sec, i,
2280                                                     obj->efile.btf_maps_shndx,
2281                                                     data, strict,
2282                                                     pin_root_path);
2283                 if (err)
2284                         return err;
2285         }
2286
2287         return 0;
2288 }
2289
2290 static int bpf_object__init_maps(struct bpf_object *obj,
2291                                  const struct bpf_object_open_opts *opts)
2292 {
2293         const char *pin_root_path;
2294         bool strict;
2295         int err;
2296
2297         strict = !OPTS_GET(opts, relaxed_maps, false);
2298         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2299
2300         err = bpf_object__init_user_maps(obj, strict);
2301         err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2302         err = err ?: bpf_object__init_global_data_maps(obj);
2303         err = err ?: bpf_object__init_kconfig_map(obj);
2304         err = err ?: bpf_object__init_struct_ops_maps(obj);
2305         if (err)
2306                 return err;
2307
2308         return 0;
2309 }
2310
2311 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2312 {
2313         GElf_Shdr sh;
2314
2315         if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
2316                 return false;
2317
2318         return sh.sh_flags & SHF_EXECINSTR;
2319 }
2320
2321 static bool btf_needs_sanitization(struct bpf_object *obj)
2322 {
2323         bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
2324         bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
2325         bool has_func = kernel_supports(FEAT_BTF_FUNC);
2326
2327         return !has_func || !has_datasec || !has_func_global;
2328 }
2329
2330 static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2331 {
2332         bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
2333         bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
2334         bool has_func = kernel_supports(FEAT_BTF_FUNC);
2335         struct btf_type *t;
2336         int i, j, vlen;
2337
2338         for (i = 1; i <= btf__get_nr_types(btf); i++) {
2339                 t = (struct btf_type *)btf__type_by_id(btf, i);
2340
2341                 if (!has_datasec && btf_is_var(t)) {
2342                         /* replace VAR with INT */
2343                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2344                         /*
2345                          * using size = 1 is the safest choice, 4 will be too
2346                          * big and cause kernel BTF validation failure if
2347                          * original variable took less than 4 bytes
2348                          */
2349                         t->size = 1;
2350                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2351                 } else if (!has_datasec && btf_is_datasec(t)) {
2352                         /* replace DATASEC with STRUCT */
2353                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
2354                         struct btf_member *m = btf_members(t);
2355                         struct btf_type *vt;
2356                         char *name;
2357
2358                         name = (char *)btf__name_by_offset(btf, t->name_off);
2359                         while (*name) {
2360                                 if (*name == '.')
2361                                         *name = '_';
2362                                 name++;
2363                         }
2364
2365                         vlen = btf_vlen(t);
2366                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2367                         for (j = 0; j < vlen; j++, v++, m++) {
2368                                 /* order of field assignments is important */
2369                                 m->offset = v->offset * 8;
2370                                 m->type = v->type;
2371                                 /* preserve variable name as member name */
2372                                 vt = (void *)btf__type_by_id(btf, v->type);
2373                                 m->name_off = vt->name_off;
2374                         }
2375                 } else if (!has_func && btf_is_func_proto(t)) {
2376                         /* replace FUNC_PROTO with ENUM */
2377                         vlen = btf_vlen(t);
2378                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2379                         t->size = sizeof(__u32); /* kernel enforced */
2380                 } else if (!has_func && btf_is_func(t)) {
2381                         /* replace FUNC with TYPEDEF */
2382                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2383                 } else if (!has_func_global && btf_is_func(t)) {
2384                         /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2385                         t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2386                 }
2387         }
2388 }
2389
2390 static bool libbpf_needs_btf(const struct bpf_object *obj)
2391 {
2392         return obj->efile.btf_maps_shndx >= 0 ||
2393                obj->efile.st_ops_shndx >= 0 ||
2394                obj->nr_extern > 0;
2395 }
2396
2397 static bool kernel_needs_btf(const struct bpf_object *obj)
2398 {
2399         return obj->efile.st_ops_shndx >= 0;
2400 }
2401
2402 static int bpf_object__init_btf(struct bpf_object *obj,
2403                                 Elf_Data *btf_data,
2404                                 Elf_Data *btf_ext_data)
2405 {
2406         int err = -ENOENT;
2407
2408         if (btf_data) {
2409                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2410                 if (IS_ERR(obj->btf)) {
2411                         err = PTR_ERR(obj->btf);
2412                         obj->btf = NULL;
2413                         pr_warn("Error loading ELF section %s: %d.\n",
2414                                 BTF_ELF_SEC, err);
2415                         goto out;
2416                 }
2417                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2418                 btf__set_pointer_size(obj->btf, 8);
2419                 err = 0;
2420         }
2421         if (btf_ext_data) {
2422                 if (!obj->btf) {
2423                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2424                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2425                         goto out;
2426                 }
2427                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
2428                                             btf_ext_data->d_size);
2429                 if (IS_ERR(obj->btf_ext)) {
2430                         pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
2431                                 BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
2432                         obj->btf_ext = NULL;
2433                         goto out;
2434                 }
2435         }
2436 out:
2437         if (err && libbpf_needs_btf(obj)) {
2438                 pr_warn("BTF is required, but is missing or corrupted.\n");
2439                 return err;
2440         }
2441         return 0;
2442 }
2443
2444 static int bpf_object__finalize_btf(struct bpf_object *obj)
2445 {
2446         int err;
2447
2448         if (!obj->btf)
2449                 return 0;
2450
2451         err = btf__finalize_data(obj, obj->btf);
2452         if (err) {
2453                 pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
2454                 return err;
2455         }
2456
2457         return 0;
2458 }
2459
2460 static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
2461 {
2462         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
2463             prog->type == BPF_PROG_TYPE_LSM)
2464                 return true;
2465
2466         /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
2467          * also need vmlinux BTF
2468          */
2469         if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
2470                 return true;
2471
2472         return false;
2473 }
2474
2475 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
2476 {
2477         bool need_vmlinux_btf = false;
2478         struct bpf_program *prog;
2479         int err;
2480
2481         /* CO-RE relocations need kernel BTF */
2482         if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
2483                 need_vmlinux_btf = true;
2484
2485         bpf_object__for_each_program(prog, obj) {
2486                 if (!prog->load)
2487                         continue;
2488                 if (libbpf_prog_needs_vmlinux_btf(prog)) {
2489                         need_vmlinux_btf = true;
2490                         break;
2491                 }
2492         }
2493
2494         if (!need_vmlinux_btf)
2495                 return 0;
2496
2497         obj->btf_vmlinux = libbpf_find_kernel_btf();
2498         if (IS_ERR(obj->btf_vmlinux)) {
2499                 err = PTR_ERR(obj->btf_vmlinux);
2500                 pr_warn("Error loading vmlinux BTF: %d\n", err);
2501                 obj->btf_vmlinux = NULL;
2502                 return err;
2503         }
2504         return 0;
2505 }
2506
2507 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
2508 {
2509         struct btf *kern_btf = obj->btf;
2510         bool btf_mandatory, sanitize;
2511         int err = 0;
2512
2513         if (!obj->btf)
2514                 return 0;
2515
2516         if (!kernel_supports(FEAT_BTF)) {
2517                 if (kernel_needs_btf(obj)) {
2518                         err = -EOPNOTSUPP;
2519                         goto report;
2520                 }
2521                 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
2522                 return 0;
2523         }
2524
2525         sanitize = btf_needs_sanitization(obj);
2526         if (sanitize) {
2527                 const void *raw_data;
2528                 __u32 sz;
2529
2530                 /* clone BTF to sanitize a copy and leave the original intact */
2531                 raw_data = btf__get_raw_data(obj->btf, &sz);
2532                 kern_btf = btf__new(raw_data, sz);
2533                 if (IS_ERR(kern_btf))
2534                         return PTR_ERR(kern_btf);
2535
2536                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2537                 btf__set_pointer_size(obj->btf, 8);
2538                 bpf_object__sanitize_btf(obj, kern_btf);
2539         }
2540
2541         err = btf__load(kern_btf);
2542         if (sanitize) {
2543                 if (!err) {
2544                         /* move fd to libbpf's BTF */
2545                         btf__set_fd(obj->btf, btf__fd(kern_btf));
2546                         btf__set_fd(kern_btf, -1);
2547                 }
2548                 btf__free(kern_btf);
2549         }
2550 report:
2551         if (err) {
2552                 btf_mandatory = kernel_needs_btf(obj);
2553                 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
2554                         btf_mandatory ? "BTF is mandatory, can't proceed."
2555                                       : "BTF is optional, ignoring.");
2556                 if (!btf_mandatory)
2557                         err = 0;
2558         }
2559         return err;
2560 }
2561
2562 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
2563 {
2564         const char *name;
2565
2566         name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
2567         if (!name) {
2568                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2569                         off, obj->path, elf_errmsg(-1));
2570                 return NULL;
2571         }
2572
2573         return name;
2574 }
2575
2576 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
2577 {
2578         const char *name;
2579
2580         name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
2581         if (!name) {
2582                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2583                         off, obj->path, elf_errmsg(-1));
2584                 return NULL;
2585         }
2586
2587         return name;
2588 }
2589
2590 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
2591 {
2592         Elf_Scn *scn;
2593
2594         scn = elf_getscn(obj->efile.elf, idx);
2595         if (!scn) {
2596                 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
2597                         idx, obj->path, elf_errmsg(-1));
2598                 return NULL;
2599         }
2600         return scn;
2601 }
2602
2603 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
2604 {
2605         Elf_Scn *scn = NULL;
2606         Elf *elf = obj->efile.elf;
2607         const char *sec_name;
2608
2609         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2610                 sec_name = elf_sec_name(obj, scn);
2611                 if (!sec_name)
2612                         return NULL;
2613
2614                 if (strcmp(sec_name, name) != 0)
2615                         continue;
2616
2617                 return scn;
2618         }
2619         return NULL;
2620 }
2621
2622 static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
2623 {
2624         if (!scn)
2625                 return -EINVAL;
2626
2627         if (gelf_getshdr(scn, hdr) != hdr) {
2628                 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
2629                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2630                 return -EINVAL;
2631         }
2632
2633         return 0;
2634 }
2635
2636 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
2637 {
2638         const char *name;
2639         GElf_Shdr sh;
2640
2641         if (!scn)
2642                 return NULL;
2643
2644         if (elf_sec_hdr(obj, scn, &sh))
2645                 return NULL;
2646
2647         name = elf_sec_str(obj, sh.sh_name);
2648         if (!name) {
2649                 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
2650                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2651                 return NULL;
2652         }
2653
2654         return name;
2655 }
2656
2657 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
2658 {
2659         Elf_Data *data;
2660
2661         if (!scn)
2662                 return NULL;
2663
2664         data = elf_getdata(scn, 0);
2665         if (!data) {
2666                 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
2667                         elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
2668                         obj->path, elf_errmsg(-1));
2669                 return NULL;
2670         }
2671
2672         return data;
2673 }
2674
2675 static bool is_sec_name_dwarf(const char *name)
2676 {
2677         /* approximation, but the actual list is too long */
2678         return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
2679 }
2680
2681 static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
2682 {
2683         /* no special handling of .strtab */
2684         if (hdr->sh_type == SHT_STRTAB)
2685                 return true;
2686
2687         /* ignore .llvm_addrsig section as well */
2688         if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
2689                 return true;
2690
2691         /* no subprograms will lead to an empty .text section, ignore it */
2692         if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
2693             strcmp(name, ".text") == 0)
2694                 return true;
2695
2696         /* DWARF sections */
2697         if (is_sec_name_dwarf(name))
2698                 return true;
2699
2700         if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
2701                 name += sizeof(".rel") - 1;
2702                 /* DWARF section relocations */
2703                 if (is_sec_name_dwarf(name))
2704                         return true;
2705
2706                 /* .BTF and .BTF.ext don't need relocations */
2707                 if (strcmp(name, BTF_ELF_SEC) == 0 ||
2708                     strcmp(name, BTF_EXT_ELF_SEC) == 0)
2709                         return true;
2710         }
2711
2712         return false;
2713 }
2714
2715 static int bpf_object__elf_collect(struct bpf_object *obj)
2716 {
2717         Elf *elf = obj->efile.elf;
2718         Elf_Data *btf_ext_data = NULL;
2719         Elf_Data *btf_data = NULL;
2720         Elf_Scn *scn = NULL;
2721         int idx = 0, err = 0;
2722
2723         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2724                 const char *name;
2725                 GElf_Shdr sh;
2726                 Elf_Data *data;
2727
2728                 idx++;
2729
2730                 if (elf_sec_hdr(obj, scn, &sh))
2731                         return -LIBBPF_ERRNO__FORMAT;
2732
2733                 name = elf_sec_str(obj, sh.sh_name);
2734                 if (!name)
2735                         return -LIBBPF_ERRNO__FORMAT;
2736
2737                 if (ignore_elf_section(&sh, name))
2738                         continue;
2739
2740                 data = elf_sec_data(obj, scn);
2741                 if (!data)
2742                         return -LIBBPF_ERRNO__FORMAT;
2743
2744                 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
2745                          idx, name, (unsigned long)data->d_size,
2746                          (int)sh.sh_link, (unsigned long)sh.sh_flags,
2747                          (int)sh.sh_type);
2748
2749                 if (strcmp(name, "license") == 0) {
2750                         err = bpf_object__init_license(obj, data->d_buf, data->d_size);
2751                         if (err)
2752                                 return err;
2753                 } else if (strcmp(name, "version") == 0) {
2754                         err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
2755                         if (err)
2756                                 return err;
2757                 } else if (strcmp(name, "maps") == 0) {
2758                         obj->efile.maps_shndx = idx;
2759                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
2760                         obj->efile.btf_maps_shndx = idx;
2761                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
2762                         btf_data = data;
2763                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
2764                         btf_ext_data = data;
2765                 } else if (sh.sh_type == SHT_SYMTAB) {
2766                         if (obj->efile.symbols) {
2767                                 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
2768                                 return -LIBBPF_ERRNO__FORMAT;
2769                         }
2770                         obj->efile.symbols = data;
2771                         obj->efile.symbols_shndx = idx;
2772                         obj->efile.strtabidx = sh.sh_link;
2773                 } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
2774                         if (sh.sh_flags & SHF_EXECINSTR) {
2775                                 if (strcmp(name, ".text") == 0)
2776                                         obj->efile.text_shndx = idx;
2777                                 err = bpf_object__add_program(obj, data->d_buf,
2778                                                               data->d_size,
2779                                                               name, idx);
2780                                 if (err)
2781                                         return err;
2782                         } else if (strcmp(name, DATA_SEC) == 0) {
2783                                 obj->efile.data = data;
2784                                 obj->efile.data_shndx = idx;
2785                         } else if (strcmp(name, RODATA_SEC) == 0) {
2786                                 obj->efile.rodata = data;
2787                                 obj->efile.rodata_shndx = idx;
2788                         } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
2789                                 obj->efile.st_ops_data = data;
2790                                 obj->efile.st_ops_shndx = idx;
2791                         } else {
2792                                 pr_info("elf: skipping unrecognized data section(%d) %s\n",
2793                                         idx, name);
2794                         }
2795                 } else if (sh.sh_type == SHT_REL) {
2796                         int nr_sects = obj->efile.nr_reloc_sects;
2797                         void *sects = obj->efile.reloc_sects;
2798                         int sec = sh.sh_info; /* points to other section */
2799
2800                         /* Only do relo for section with exec instructions */
2801                         if (!section_have_execinstr(obj, sec) &&
2802                             strcmp(name, ".rel" STRUCT_OPS_SEC) &&
2803                             strcmp(name, ".rel" MAPS_ELF_SEC)) {
2804                                 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
2805                                         idx, name, sec,
2806                                         elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
2807                                 continue;
2808                         }
2809
2810                         sects = libbpf_reallocarray(sects, nr_sects + 1,
2811                                                     sizeof(*obj->efile.reloc_sects));
2812                         if (!sects)
2813                                 return -ENOMEM;
2814
2815                         obj->efile.reloc_sects = sects;
2816                         obj->efile.nr_reloc_sects++;
2817
2818                         obj->efile.reloc_sects[nr_sects].shdr = sh;
2819                         obj->efile.reloc_sects[nr_sects].data = data;
2820                 } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
2821                         obj->efile.bss = data;
2822                         obj->efile.bss_shndx = idx;
2823                 } else {
2824                         pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, sh.sh_size);
2825                 }
2826         }
2827
2828         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
2829                 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
2830                 return -LIBBPF_ERRNO__FORMAT;
2831         }
2832         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
2833 }
2834
2835 static bool sym_is_extern(const GElf_Sym *sym)
2836 {
2837         int bind = GELF_ST_BIND(sym->st_info);
2838         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
2839         return sym->st_shndx == SHN_UNDEF &&
2840                (bind == STB_GLOBAL || bind == STB_WEAK) &&
2841                GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
2842 }
2843
2844 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
2845 {
2846         const struct btf_type *t;
2847         const char *var_name;
2848         int i, n;
2849
2850         if (!btf)
2851                 return -ESRCH;
2852
2853         n = btf__get_nr_types(btf);
2854         for (i = 1; i <= n; i++) {
2855                 t = btf__type_by_id(btf, i);
2856
2857                 if (!btf_is_var(t))
2858                         continue;
2859
2860                 var_name = btf__name_by_offset(btf, t->name_off);
2861                 if (strcmp(var_name, ext_name))
2862                         continue;
2863
2864                 if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
2865                         return -EINVAL;
2866
2867                 return i;
2868         }
2869
2870         return -ENOENT;
2871 }
2872
2873 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
2874         const struct btf_var_secinfo *vs;
2875         const struct btf_type *t;
2876         int i, j, n;
2877
2878         if (!btf)
2879                 return -ESRCH;
2880
2881         n = btf__get_nr_types(btf);
2882         for (i = 1; i <= n; i++) {
2883                 t = btf__type_by_id(btf, i);
2884
2885                 if (!btf_is_datasec(t))
2886                         continue;
2887
2888                 vs = btf_var_secinfos(t);
2889                 for (j = 0; j < btf_vlen(t); j++, vs++) {
2890                         if (vs->type == ext_btf_id)
2891                                 return i;
2892                 }
2893         }
2894
2895         return -ENOENT;
2896 }
2897
2898 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
2899                                      bool *is_signed)
2900 {
2901         const struct btf_type *t;
2902         const char *name;
2903
2904         t = skip_mods_and_typedefs(btf, id, NULL);
2905         name = btf__name_by_offset(btf, t->name_off);
2906
2907         if (is_signed)
2908                 *is_signed = false;
2909         switch (btf_kind(t)) {
2910         case BTF_KIND_INT: {
2911                 int enc = btf_int_encoding(t);
2912
2913                 if (enc & BTF_INT_BOOL)
2914                         return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
2915                 if (is_signed)
2916                         *is_signed = enc & BTF_INT_SIGNED;
2917                 if (t->size == 1)
2918                         return KCFG_CHAR;
2919                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
2920                         return KCFG_UNKNOWN;
2921                 return KCFG_INT;
2922         }
2923         case BTF_KIND_ENUM:
2924                 if (t->size != 4)
2925                         return KCFG_UNKNOWN;
2926                 if (strcmp(name, "libbpf_tristate"))
2927                         return KCFG_UNKNOWN;
2928                 return KCFG_TRISTATE;
2929         case BTF_KIND_ARRAY:
2930                 if (btf_array(t)->nelems == 0)
2931                         return KCFG_UNKNOWN;
2932                 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
2933                         return KCFG_UNKNOWN;
2934                 return KCFG_CHAR_ARR;
2935         default:
2936                 return KCFG_UNKNOWN;
2937         }
2938 }
2939
2940 static int cmp_externs(const void *_a, const void *_b)
2941 {
2942         const struct extern_desc *a = _a;
2943         const struct extern_desc *b = _b;
2944
2945         if (a->type != b->type)
2946                 return a->type < b->type ? -1 : 1;
2947
2948         if (a->type == EXT_KCFG) {
2949                 /* descending order by alignment requirements */
2950                 if (a->kcfg.align != b->kcfg.align)
2951                         return a->kcfg.align > b->kcfg.align ? -1 : 1;
2952                 /* ascending order by size, within same alignment class */
2953                 if (a->kcfg.sz != b->kcfg.sz)
2954                         return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
2955         }
2956
2957         /* resolve ties by name */
2958         return strcmp(a->name, b->name);
2959 }
2960
2961 static int find_int_btf_id(const struct btf *btf)
2962 {
2963         const struct btf_type *t;
2964         int i, n;
2965
2966         n = btf__get_nr_types(btf);
2967         for (i = 1; i <= n; i++) {
2968                 t = btf__type_by_id(btf, i);
2969
2970                 if (btf_is_int(t) && btf_int_bits(t) == 32)
2971                         return i;
2972         }
2973
2974         return 0;
2975 }
2976
2977 static int bpf_object__collect_externs(struct bpf_object *obj)
2978 {
2979         struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
2980         const struct btf_type *t;
2981         struct extern_desc *ext;
2982         int i, n, off;
2983         const char *ext_name, *sec_name;
2984         Elf_Scn *scn;
2985         GElf_Shdr sh;
2986
2987         if (!obj->efile.symbols)
2988                 return 0;
2989
2990         scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
2991         if (elf_sec_hdr(obj, scn, &sh))
2992                 return -LIBBPF_ERRNO__FORMAT;
2993
2994         n = sh.sh_size / sh.sh_entsize;
2995         pr_debug("looking for externs among %d symbols...\n", n);
2996
2997         for (i = 0; i < n; i++) {
2998                 GElf_Sym sym;
2999
3000                 if (!gelf_getsym(obj->efile.symbols, i, &sym))
3001                         return -LIBBPF_ERRNO__FORMAT;
3002                 if (!sym_is_extern(&sym))
3003                         continue;
3004                 ext_name = elf_sym_str(obj, sym.st_name);
3005                 if (!ext_name || !ext_name[0])
3006                         continue;
3007
3008                 ext = obj->externs;
3009                 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3010                 if (!ext)
3011                         return -ENOMEM;
3012                 obj->externs = ext;
3013                 ext = &ext[obj->nr_extern];
3014                 memset(ext, 0, sizeof(*ext));
3015                 obj->nr_extern++;
3016
3017                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3018                 if (ext->btf_id <= 0) {
3019                         pr_warn("failed to find BTF for extern '%s': %d\n",
3020                                 ext_name, ext->btf_id);
3021                         return ext->btf_id;
3022                 }
3023                 t = btf__type_by_id(obj->btf, ext->btf_id);
3024                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
3025                 ext->sym_idx = i;
3026                 ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
3027
3028                 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3029                 if (ext->sec_btf_id <= 0) {
3030                         pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3031                                 ext_name, ext->btf_id, ext->sec_btf_id);
3032                         return ext->sec_btf_id;
3033                 }
3034                 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3035                 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3036
3037                 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3038                         kcfg_sec = sec;
3039                         ext->type = EXT_KCFG;
3040                         ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3041                         if (ext->kcfg.sz <= 0) {
3042                                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3043                                         ext_name, ext->kcfg.sz);
3044                                 return ext->kcfg.sz;
3045                         }
3046                         ext->kcfg.align = btf__align_of(obj->btf, t->type);
3047                         if (ext->kcfg.align <= 0) {
3048                                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3049                                         ext_name, ext->kcfg.align);
3050                                 return -EINVAL;
3051                         }
3052                         ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3053                                                         &ext->kcfg.is_signed);
3054                         if (ext->kcfg.type == KCFG_UNKNOWN) {
3055                                 pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
3056                                 return -ENOTSUP;
3057                         }
3058                 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3059                         const struct btf_type *vt;
3060
3061                         ksym_sec = sec;
3062                         ext->type = EXT_KSYM;
3063
3064                         vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
3065                         if (!btf_is_void(vt)) {
3066                                 pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
3067                                 return -ENOTSUP;
3068                         }
3069                 } else {
3070                         pr_warn("unrecognized extern section '%s'\n", sec_name);
3071                         return -ENOTSUP;
3072                 }
3073         }
3074         pr_debug("collected %d externs total\n", obj->nr_extern);
3075
3076         if (!obj->nr_extern)
3077                 return 0;
3078
3079         /* sort externs by type, for kcfg ones also by (align, size, name) */
3080         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3081
3082         /* for .ksyms section, we need to turn all externs into allocated
3083          * variables in BTF to pass kernel verification; we do this by
3084          * pretending that each extern is a 8-byte variable
3085          */
3086         if (ksym_sec) {
3087                 /* find existing 4-byte integer type in BTF to use for fake
3088                  * extern variables in DATASEC
3089                  */
3090                 int int_btf_id = find_int_btf_id(obj->btf);
3091
3092                 for (i = 0; i < obj->nr_extern; i++) {
3093                         ext = &obj->externs[i];
3094                         if (ext->type != EXT_KSYM)
3095                                 continue;
3096                         pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3097                                  i, ext->sym_idx, ext->name);
3098                 }
3099
3100                 sec = ksym_sec;
3101                 n = btf_vlen(sec);
3102                 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3103                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3104                         struct btf_type *vt;
3105
3106                         vt = (void *)btf__type_by_id(obj->btf, vs->type);
3107                         ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3108                         ext = find_extern_by_name(obj, ext_name);
3109                         if (!ext) {
3110                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3111                                         ext_name);
3112                                 return -ESRCH;
3113                         }
3114                         btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3115                         vt->type = int_btf_id;
3116                         vs->offset = off;
3117                         vs->size = sizeof(int);
3118                 }
3119                 sec->size = off;
3120         }
3121
3122         if (kcfg_sec) {
3123                 sec = kcfg_sec;
3124                 /* for kcfg externs calculate their offsets within a .kconfig map */
3125                 off = 0;
3126                 for (i = 0; i < obj->nr_extern; i++) {
3127                         ext = &obj->externs[i];
3128                         if (ext->type != EXT_KCFG)
3129                                 continue;
3130
3131                         ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3132                         off = ext->kcfg.data_off + ext->kcfg.sz;
3133                         pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3134                                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3135                 }
3136                 sec->size = off;
3137                 n = btf_vlen(sec);
3138                 for (i = 0; i < n; i++) {
3139                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3140
3141                         t = btf__type_by_id(obj->btf, vs->type);
3142                         ext_name = btf__name_by_offset(obj->btf, t->name_off);
3143                         ext = find_extern_by_name(obj, ext_name);
3144                         if (!ext) {
3145                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3146                                         ext_name);
3147                                 return -ESRCH;
3148                         }
3149                         btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3150                         vs->offset = ext->kcfg.data_off;
3151                 }
3152         }
3153         return 0;
3154 }
3155
3156 static struct bpf_program *
3157 bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
3158 {
3159         struct bpf_program *prog;
3160         size_t i;
3161
3162         for (i = 0; i < obj->nr_programs; i++) {
3163                 prog = &obj->programs[i];
3164                 if (prog->idx == idx)
3165                         return prog;
3166         }
3167         return NULL;
3168 }
3169
3170 struct bpf_program *
3171 bpf_object__find_program_by_title(const struct bpf_object *obj,
3172                                   const char *title)
3173 {
3174         struct bpf_program *pos;
3175
3176         bpf_object__for_each_program(pos, obj) {
3177                 if (pos->section_name && !strcmp(pos->section_name, title))
3178                         return pos;
3179         }
3180         return NULL;
3181 }
3182
3183 struct bpf_program *
3184 bpf_object__find_program_by_name(const struct bpf_object *obj,
3185                                  const char *name)
3186 {
3187         struct bpf_program *prog;
3188
3189         bpf_object__for_each_program(prog, obj) {
3190                 if (!strcmp(prog->name, name))
3191                         return prog;
3192         }
3193         return NULL;
3194 }
3195
3196 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3197                                       int shndx)
3198 {
3199         return shndx == obj->efile.data_shndx ||
3200                shndx == obj->efile.bss_shndx ||
3201                shndx == obj->efile.rodata_shndx;
3202 }
3203
3204 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
3205                                       int shndx)
3206 {
3207         return shndx == obj->efile.maps_shndx ||
3208                shndx == obj->efile.btf_maps_shndx;
3209 }
3210
3211 static enum libbpf_map_type
3212 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
3213 {
3214         if (shndx == obj->efile.data_shndx)
3215                 return LIBBPF_MAP_DATA;
3216         else if (shndx == obj->efile.bss_shndx)
3217                 return LIBBPF_MAP_BSS;
3218         else if (shndx == obj->efile.rodata_shndx)
3219                 return LIBBPF_MAP_RODATA;
3220         else if (shndx == obj->efile.symbols_shndx)
3221                 return LIBBPF_MAP_KCONFIG;
3222         else
3223                 return LIBBPF_MAP_UNSPEC;
3224 }
3225
3226 static int bpf_program__record_reloc(struct bpf_program *prog,
3227                                      struct reloc_desc *reloc_desc,
3228                                      __u32 insn_idx, const char *name,
3229                                      const GElf_Sym *sym, const GElf_Rel *rel)
3230 {
3231         struct bpf_insn *insn = &prog->insns[insn_idx];
3232         size_t map_idx, nr_maps = prog->obj->nr_maps;
3233         struct bpf_object *obj = prog->obj;
3234         __u32 shdr_idx = sym->st_shndx;
3235         enum libbpf_map_type type;
3236         struct bpf_map *map;
3237
3238         /* sub-program call relocation */
3239         if (insn->code == (BPF_JMP | BPF_CALL)) {
3240                 if (insn->src_reg != BPF_PSEUDO_CALL) {
3241                         pr_warn("incorrect bpf_call opcode\n");
3242                         return -LIBBPF_ERRNO__RELOC;
3243                 }
3244                 /* text_shndx can be 0, if no default "main" program exists */
3245                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
3246                         pr_warn("bad call relo against section %u\n", shdr_idx);
3247                         return -LIBBPF_ERRNO__RELOC;
3248                 }
3249                 if (sym->st_value % 8) {
3250                         pr_warn("bad call relo offset: %zu\n",
3251                                 (size_t)sym->st_value);
3252                         return -LIBBPF_ERRNO__RELOC;
3253                 }
3254                 reloc_desc->type = RELO_CALL;
3255                 reloc_desc->insn_idx = insn_idx;
3256                 reloc_desc->sym_off = sym->st_value;
3257                 obj->has_pseudo_calls = true;
3258                 return 0;
3259         }
3260
3261         if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
3262                 pr_warn("invalid relo for insns[%d].code 0x%x\n",
3263                         insn_idx, insn->code);
3264                 return -LIBBPF_ERRNO__RELOC;
3265         }
3266
3267         if (sym_is_extern(sym)) {
3268                 int sym_idx = GELF_R_SYM(rel->r_info);
3269                 int i, n = obj->nr_extern;
3270                 struct extern_desc *ext;
3271
3272                 for (i = 0; i < n; i++) {
3273                         ext = &obj->externs[i];
3274                         if (ext->sym_idx == sym_idx)
3275                                 break;
3276                 }
3277                 if (i >= n) {
3278                         pr_warn("extern relo failed to find extern for sym %d\n",
3279                                 sym_idx);
3280                         return -LIBBPF_ERRNO__RELOC;
3281                 }
3282                 pr_debug("found extern #%d '%s' (sym %d) for insn %u\n",
3283                          i, ext->name, ext->sym_idx, insn_idx);
3284                 reloc_desc->type = RELO_EXTERN;
3285                 reloc_desc->insn_idx = insn_idx;
3286                 reloc_desc->sym_off = i; /* sym_off stores extern index */
3287                 return 0;
3288         }
3289
3290         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
3291                 pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n",
3292                         name, shdr_idx);
3293                 return -LIBBPF_ERRNO__RELOC;
3294         }
3295
3296         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
3297
3298         /* generic map reference relocation */
3299         if (type == LIBBPF_MAP_UNSPEC) {
3300                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
3301                         pr_warn("bad map relo against section %u\n",
3302                                 shdr_idx);
3303                         return -LIBBPF_ERRNO__RELOC;
3304                 }
3305                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3306                         map = &obj->maps[map_idx];
3307                         if (map->libbpf_type != type ||
3308                             map->sec_idx != sym->st_shndx ||
3309                             map->sec_offset != sym->st_value)
3310                                 continue;
3311                         pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n",
3312                                  map_idx, map->name, map->sec_idx,
3313                                  map->sec_offset, insn_idx);
3314                         break;
3315                 }
3316                 if (map_idx >= nr_maps) {
3317                         pr_warn("map relo failed to find map for sec %u, off %zu\n",
3318                                 shdr_idx, (size_t)sym->st_value);
3319                         return -LIBBPF_ERRNO__RELOC;
3320                 }
3321                 reloc_desc->type = RELO_LD64;
3322                 reloc_desc->insn_idx = insn_idx;
3323                 reloc_desc->map_idx = map_idx;
3324                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
3325                 return 0;
3326         }
3327
3328         /* global data map relocation */
3329         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
3330                 pr_warn("bad data relo against section %u\n", shdr_idx);
3331                 return -LIBBPF_ERRNO__RELOC;
3332         }
3333         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3334                 map = &obj->maps[map_idx];
3335                 if (map->libbpf_type != type)
3336                         continue;
3337                 pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n",
3338                          map_idx, map->name, map->sec_idx, map->sec_offset,
3339                          insn_idx);
3340                 break;
3341         }
3342         if (map_idx >= nr_maps) {
3343                 pr_warn("data relo failed to find map for sec %u\n",
3344                         shdr_idx);
3345                 return -LIBBPF_ERRNO__RELOC;
3346         }
3347
3348         reloc_desc->type = RELO_DATA;
3349         reloc_desc->insn_idx = insn_idx;
3350         reloc_desc->map_idx = map_idx;
3351         reloc_desc->sym_off = sym->st_value;
3352         return 0;
3353 }
3354
3355 static int
3356 bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
3357                            Elf_Data *data, struct bpf_object *obj)
3358 {
3359         Elf_Data *symbols = obj->efile.symbols;
3360         int err, i, nrels;
3361
3362         pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
3363         nrels = shdr->sh_size / shdr->sh_entsize;
3364
3365         prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
3366         if (!prog->reloc_desc) {
3367                 pr_warn("failed to alloc memory in relocation\n");
3368                 return -ENOMEM;
3369         }
3370         prog->nr_reloc = nrels;
3371
3372         for (i = 0; i < nrels; i++) {
3373                 const char *name;
3374                 __u32 insn_idx;
3375                 GElf_Sym sym;
3376                 GElf_Rel rel;
3377
3378                 if (!gelf_getrel(data, i, &rel)) {
3379                         pr_warn("relocation: failed to get %d reloc\n", i);
3380                         return -LIBBPF_ERRNO__FORMAT;
3381                 }
3382                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
3383                         pr_warn("relocation: symbol %zx not found\n",
3384                                 (size_t)GELF_R_SYM(rel.r_info));
3385                         return -LIBBPF_ERRNO__FORMAT;
3386                 }
3387                 if (rel.r_offset % sizeof(struct bpf_insn))
3388                         return -LIBBPF_ERRNO__FORMAT;
3389
3390                 insn_idx = rel.r_offset / sizeof(struct bpf_insn);
3391                 name = elf_sym_str(obj, sym.st_name) ?: "<?>";
3392
3393                 pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
3394                          (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info),
3395                          (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info),
3396                          GELF_ST_BIND(sym.st_info), sym.st_name, name,
3397                          insn_idx);
3398
3399                 err = bpf_program__record_reloc(prog, &prog->reloc_desc[i],
3400                                                 insn_idx, name, &sym, &rel);
3401                 if (err)
3402                         return err;
3403         }
3404         return 0;
3405 }
3406
3407 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
3408 {
3409         struct bpf_map_def *def = &map->def;
3410         __u32 key_type_id = 0, value_type_id = 0;
3411         int ret;
3412
3413         /* if it's BTF-defined map, we don't need to search for type IDs.
3414          * For struct_ops map, it does not need btf_key_type_id and
3415          * btf_value_type_id.
3416          */
3417         if (map->sec_idx == obj->efile.btf_maps_shndx ||
3418             bpf_map__is_struct_ops(map))
3419                 return 0;
3420
3421         if (!bpf_map__is_internal(map)) {
3422                 ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
3423                                            def->value_size, &key_type_id,
3424                                            &value_type_id);
3425         } else {
3426                 /*
3427                  * LLVM annotates global data differently in BTF, that is,
3428                  * only as '.data', '.bss' or '.rodata'.
3429                  */
3430                 ret = btf__find_by_name(obj->btf,
3431                                 libbpf_type_to_btf_name[map->libbpf_type]);
3432         }
3433         if (ret < 0)
3434                 return ret;
3435
3436         map->btf_key_type_id = key_type_id;
3437         map->btf_value_type_id = bpf_map__is_internal(map) ?
3438                                  ret : value_type_id;
3439         return 0;
3440 }
3441
3442 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
3443 {
3444         struct bpf_map_info info = {};
3445         __u32 len = sizeof(info);
3446         int new_fd, err;
3447         char *new_name;
3448
3449         err = bpf_obj_get_info_by_fd(fd, &info, &len);
3450         if (err)
3451                 return err;
3452
3453         new_name = strdup(info.name);
3454         if (!new_name)
3455                 return -errno;
3456
3457         new_fd = open("/", O_RDONLY | O_CLOEXEC);
3458         if (new_fd < 0) {
3459                 err = -errno;
3460                 goto err_free_new_name;
3461         }
3462
3463         new_fd = dup3(fd, new_fd, O_CLOEXEC);
3464         if (new_fd < 0) {
3465                 err = -errno;
3466                 goto err_close_new_fd;
3467         }
3468
3469         err = zclose(map->fd);
3470         if (err) {
3471                 err = -errno;
3472                 goto err_close_new_fd;
3473         }
3474         free(map->name);
3475
3476         map->fd = new_fd;
3477         map->name = new_name;
3478         map->def.type = info.type;
3479         map->def.key_size = info.key_size;
3480         map->def.value_size = info.value_size;
3481         map->def.max_entries = info.max_entries;
3482         map->def.map_flags = info.map_flags;
3483         map->btf_key_type_id = info.btf_key_type_id;
3484         map->btf_value_type_id = info.btf_value_type_id;
3485         map->reused = true;
3486
3487         return 0;
3488
3489 err_close_new_fd:
3490         close(new_fd);
3491 err_free_new_name:
3492         free(new_name);
3493         return err;
3494 }
3495
3496 __u32 bpf_map__max_entries(const struct bpf_map *map)
3497 {
3498         return map->def.max_entries;
3499 }
3500
3501 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
3502 {
3503         if (map->fd >= 0)
3504                 return -EBUSY;
3505         map->def.max_entries = max_entries;
3506         return 0;
3507 }
3508
3509 int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
3510 {
3511         if (!map || !max_entries)
3512                 return -EINVAL;
3513
3514         return bpf_map__set_max_entries(map, max_entries);
3515 }
3516
3517 static int
3518 bpf_object__probe_loading(struct bpf_object *obj)
3519 {
3520         struct bpf_load_program_attr attr;
3521         char *cp, errmsg[STRERR_BUFSIZE];
3522         struct bpf_insn insns[] = {
3523                 BPF_MOV64_IMM(BPF_REG_0, 0),
3524                 BPF_EXIT_INSN(),
3525         };
3526         int ret;
3527
3528         /* make sure basic loading works */
3529
3530         memset(&attr, 0, sizeof(attr));
3531         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3532         attr.insns = insns;
3533         attr.insns_cnt = ARRAY_SIZE(insns);
3534         attr.license = "GPL";
3535
3536         ret = bpf_load_program_xattr(&attr, NULL, 0);
3537         if (ret < 0) {
3538                 ret = errno;
3539                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3540                 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
3541                         "program. Make sure your kernel supports BPF "
3542                         "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
3543                         "set to big enough value.\n", __func__, cp, ret);
3544                 return -ret;
3545         }
3546         close(ret);
3547
3548         return 0;
3549 }
3550
3551 static int probe_fd(int fd)
3552 {
3553         if (fd >= 0)
3554                 close(fd);
3555         return fd >= 0;
3556 }
3557
3558 static int probe_kern_prog_name(void)
3559 {
3560         struct bpf_load_program_attr attr;
3561         struct bpf_insn insns[] = {
3562                 BPF_MOV64_IMM(BPF_REG_0, 0),
3563                 BPF_EXIT_INSN(),
3564         };
3565         int ret;
3566
3567         /* make sure loading with name works */
3568
3569         memset(&attr, 0, sizeof(attr));
3570         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3571         attr.insns = insns;
3572         attr.insns_cnt = ARRAY_SIZE(insns);
3573         attr.license = "GPL";
3574         attr.name = "test";
3575         ret = bpf_load_program_xattr(&attr, NULL, 0);
3576         return probe_fd(ret);
3577 }
3578
3579 static int probe_kern_global_data(void)
3580 {
3581         struct bpf_load_program_attr prg_attr;
3582         struct bpf_create_map_attr map_attr;
3583         char *cp, errmsg[STRERR_BUFSIZE];
3584         struct bpf_insn insns[] = {
3585                 BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
3586                 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
3587                 BPF_MOV64_IMM(BPF_REG_0, 0),
3588                 BPF_EXIT_INSN(),
3589         };
3590         int ret, map;
3591
3592         memset(&map_attr, 0, sizeof(map_attr));
3593         map_attr.map_type = BPF_MAP_TYPE_ARRAY;
3594         map_attr.key_size = sizeof(int);
3595         map_attr.value_size = 32;
3596         map_attr.max_entries = 1;
3597
3598         map = bpf_create_map_xattr(&map_attr);
3599         if (map < 0) {
3600                 ret = -errno;
3601                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3602                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
3603                         __func__, cp, -ret);
3604                 return ret;
3605         }
3606
3607         insns[0].imm = map;
3608
3609         memset(&prg_attr, 0, sizeof(prg_attr));
3610         prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3611         prg_attr.insns = insns;
3612         prg_attr.insns_cnt = ARRAY_SIZE(insns);
3613         prg_attr.license = "GPL";
3614
3615         ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
3616         close(map);
3617         return probe_fd(ret);
3618 }
3619
3620 static int probe_kern_btf(void)
3621 {
3622         static const char strs[] = "\0int";
3623         __u32 types[] = {
3624                 /* int */
3625                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
3626         };
3627
3628         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3629                                              strs, sizeof(strs)));
3630 }
3631
3632 static int probe_kern_btf_func(void)
3633 {
3634         static const char strs[] = "\0int\0x\0a";
3635         /* void x(int a) {} */
3636         __u32 types[] = {
3637                 /* int */
3638                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
3639                 /* FUNC_PROTO */                                /* [2] */
3640                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
3641                 BTF_PARAM_ENC(7, 1),
3642                 /* FUNC x */                                    /* [3] */
3643                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
3644         };
3645
3646         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3647                                              strs, sizeof(strs)));
3648 }
3649
3650 static int probe_kern_btf_func_global(void)
3651 {
3652         static const char strs[] = "\0int\0x\0a";
3653         /* static void x(int a) {} */
3654         __u32 types[] = {
3655                 /* int */
3656                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
3657                 /* FUNC_PROTO */                                /* [2] */
3658                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
3659                 BTF_PARAM_ENC(7, 1),
3660                 /* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
3661                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
3662         };
3663
3664         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3665                                              strs, sizeof(strs)));
3666 }
3667
3668 static int probe_kern_btf_datasec(void)
3669 {
3670         static const char strs[] = "\0x\0.data";
3671         /* static int a; */
3672         __u32 types[] = {
3673                 /* int */
3674                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
3675                 /* VAR x */                                     /* [2] */
3676                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
3677                 BTF_VAR_STATIC,
3678                 /* DATASEC val */                               /* [3] */
3679                 BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
3680                 BTF_VAR_SECINFO_ENC(2, 0, 4),
3681         };
3682
3683         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3684                                              strs, sizeof(strs)));
3685 }
3686
3687 static int probe_kern_array_mmap(void)
3688 {
3689         struct bpf_create_map_attr attr = {
3690                 .map_type = BPF_MAP_TYPE_ARRAY,
3691                 .map_flags = BPF_F_MMAPABLE,
3692                 .key_size = sizeof(int),
3693                 .value_size = sizeof(int),
3694                 .max_entries = 1,
3695         };
3696
3697         return probe_fd(bpf_create_map_xattr(&attr));
3698 }
3699
3700 static int probe_kern_exp_attach_type(void)
3701 {
3702         struct bpf_load_program_attr attr;
3703         struct bpf_insn insns[] = {
3704                 BPF_MOV64_IMM(BPF_REG_0, 0),
3705                 BPF_EXIT_INSN(),
3706         };
3707
3708         memset(&attr, 0, sizeof(attr));
3709         /* use any valid combination of program type and (optional)
3710          * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
3711          * to see if kernel supports expected_attach_type field for
3712          * BPF_PROG_LOAD command
3713          */
3714         attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
3715         attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
3716         attr.insns = insns;
3717         attr.insns_cnt = ARRAY_SIZE(insns);
3718         attr.license = "GPL";
3719
3720         return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
3721 }
3722
3723 static int probe_kern_probe_read_kernel(void)
3724 {
3725         struct bpf_load_program_attr attr;
3726         struct bpf_insn insns[] = {
3727                 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),   /* r1 = r10 (fp) */
3728                 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),  /* r1 += -8 */
3729                 BPF_MOV64_IMM(BPF_REG_2, 8),            /* r2 = 8 */
3730                 BPF_MOV64_IMM(BPF_REG_3, 0),            /* r3 = 0 */
3731                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
3732                 BPF_EXIT_INSN(),
3733         };
3734
3735         memset(&attr, 0, sizeof(attr));
3736         attr.prog_type = BPF_PROG_TYPE_KPROBE;
3737         attr.insns = insns;
3738         attr.insns_cnt = ARRAY_SIZE(insns);
3739         attr.license = "GPL";
3740
3741         return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
3742 }
3743
3744 enum kern_feature_result {
3745         FEAT_UNKNOWN = 0,
3746         FEAT_SUPPORTED = 1,
3747         FEAT_MISSING = 2,
3748 };
3749
3750 typedef int (*feature_probe_fn)(void);
3751
3752 static struct kern_feature_desc {
3753         const char *desc;
3754         feature_probe_fn probe;
3755         enum kern_feature_result res;
3756 } feature_probes[__FEAT_CNT] = {
3757         [FEAT_PROG_NAME] = {
3758                 "BPF program name", probe_kern_prog_name,
3759         },
3760         [FEAT_GLOBAL_DATA] = {
3761                 "global variables", probe_kern_global_data,
3762         },
3763         [FEAT_BTF] = {
3764                 "minimal BTF", probe_kern_btf,
3765         },
3766         [FEAT_BTF_FUNC] = {
3767                 "BTF functions", probe_kern_btf_func,
3768         },
3769         [FEAT_BTF_GLOBAL_FUNC] = {
3770                 "BTF global function", probe_kern_btf_func_global,
3771         },
3772         [FEAT_BTF_DATASEC] = {
3773                 "BTF data section and variable", probe_kern_btf_datasec,
3774         },
3775         [FEAT_ARRAY_MMAP] = {
3776                 "ARRAY map mmap()", probe_kern_array_mmap,
3777         },
3778         [FEAT_EXP_ATTACH_TYPE] = {
3779                 "BPF_PROG_LOAD expected_attach_type attribute",
3780                 probe_kern_exp_attach_type,
3781         },
3782         [FEAT_PROBE_READ_KERN] = {
3783                 "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
3784         }
3785 };
3786
3787 static bool kernel_supports(enum kern_feature_id feat_id)
3788 {
3789         struct kern_feature_desc *feat = &feature_probes[feat_id];
3790         int ret;
3791
3792         if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
3793                 ret = feat->probe();
3794                 if (ret > 0) {
3795                         WRITE_ONCE(feat->res, FEAT_SUPPORTED);
3796                 } else if (ret == 0) {
3797                         WRITE_ONCE(feat->res, FEAT_MISSING);
3798                 } else {
3799                         pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
3800                         WRITE_ONCE(feat->res, FEAT_MISSING);
3801                 }
3802         }
3803
3804         return READ_ONCE(feat->res) == FEAT_SUPPORTED;
3805 }
3806
3807 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
3808 {
3809         struct bpf_map_info map_info = {};
3810         char msg[STRERR_BUFSIZE];
3811         __u32 map_info_len;
3812
3813         map_info_len = sizeof(map_info);
3814
3815         if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
3816                 pr_warn("failed to get map info for map FD %d: %s\n",
3817                         map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
3818                 return false;
3819         }
3820
3821         return (map_info.type == map->def.type &&
3822                 map_info.key_size == map->def.key_size &&
3823                 map_info.value_size == map->def.value_size &&
3824                 map_info.max_entries == map->def.max_entries &&
3825                 map_info.map_flags == map->def.map_flags);
3826 }
3827
3828 static int
3829 bpf_object__reuse_map(struct bpf_map *map)
3830 {
3831         char *cp, errmsg[STRERR_BUFSIZE];
3832         int err, pin_fd;
3833
3834         pin_fd = bpf_obj_get(map->pin_path);
3835         if (pin_fd < 0) {
3836                 err = -errno;
3837                 if (err == -ENOENT) {
3838                         pr_debug("found no pinned map to reuse at '%s'\n",
3839                                  map->pin_path);
3840                         return 0;
3841                 }
3842
3843                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
3844                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
3845                         map->pin_path, cp);
3846                 return err;
3847         }
3848
3849         if (!map_is_reuse_compat(map, pin_fd)) {
3850                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
3851                         map->pin_path);
3852                 close(pin_fd);
3853                 return -EINVAL;
3854         }
3855
3856         err = bpf_map__reuse_fd(map, pin_fd);
3857         if (err) {
3858                 close(pin_fd);
3859                 return err;
3860         }
3861         map->pinned = true;
3862         pr_debug("reused pinned map at '%s'\n", map->pin_path);
3863
3864         return 0;
3865 }
3866
3867 static int
3868 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
3869 {
3870         enum libbpf_map_type map_type = map->libbpf_type;
3871         char *cp, errmsg[STRERR_BUFSIZE];
3872         int err, zero = 0;
3873
3874         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
3875         if (err) {
3876                 err = -errno;
3877                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
3878                 pr_warn("Error setting initial map(%s) contents: %s\n",
3879                         map->name, cp);
3880                 return err;
3881         }
3882
3883         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
3884         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
3885                 err = bpf_map_freeze(map->fd);
3886                 if (err) {
3887                         err = -errno;
3888                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
3889                         pr_warn("Error freezing map(%s) as read-only: %s\n",
3890                                 map->name, cp);
3891                         return err;
3892                 }
3893         }
3894         return 0;
3895 }
3896
3897 static void bpf_map__destroy(struct bpf_map *map);
3898
3899 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
3900 {
3901         struct bpf_create_map_attr create_attr;
3902         struct bpf_map_def *def = &map->def;
3903
3904         memset(&create_attr, 0, sizeof(create_attr));
3905
3906         if (kernel_supports(FEAT_PROG_NAME))
3907                 create_attr.name = map->name;
3908         create_attr.map_ifindex = map->map_ifindex;
3909         create_attr.map_type = def->type;
3910         create_attr.map_flags = def->map_flags;
3911         create_attr.key_size = def->key_size;
3912         create_attr.value_size = def->value_size;
3913         create_attr.numa_node = map->numa_node;
3914
3915         if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
3916                 int nr_cpus;
3917
3918                 nr_cpus = libbpf_num_possible_cpus();
3919                 if (nr_cpus < 0) {
3920                         pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
3921                                 map->name, nr_cpus);
3922                         return nr_cpus;
3923                 }
3924                 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
3925                 create_attr.max_entries = nr_cpus;
3926         } else {
3927                 create_attr.max_entries = def->max_entries;
3928         }
3929
3930         if (bpf_map__is_struct_ops(map))
3931                 create_attr.btf_vmlinux_value_type_id =
3932                         map->btf_vmlinux_value_type_id;
3933
3934         create_attr.btf_fd = 0;
3935         create_attr.btf_key_type_id = 0;
3936         create_attr.btf_value_type_id = 0;
3937         if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
3938                 create_attr.btf_fd = btf__fd(obj->btf);
3939                 create_attr.btf_key_type_id = map->btf_key_type_id;
3940                 create_attr.btf_value_type_id = map->btf_value_type_id;
3941         }
3942
3943         if (bpf_map_type__is_map_in_map(def->type)) {
3944                 if (map->inner_map) {
3945                         int err;
3946
3947                         err = bpf_object__create_map(obj, map->inner_map);
3948                         if (err) {
3949                                 pr_warn("map '%s': failed to create inner map: %d\n",
3950                                         map->name, err);
3951                                 return err;
3952                         }
3953                         map->inner_map_fd = bpf_map__fd(map->inner_map);
3954                 }
3955                 if (map->inner_map_fd >= 0)
3956                         create_attr.inner_map_fd = map->inner_map_fd;
3957         }
3958
3959         map->fd = bpf_create_map_xattr(&create_attr);
3960         if (map->fd < 0 && (create_attr.btf_key_type_id ||
3961                             create_attr.btf_value_type_id)) {
3962                 char *cp, errmsg[STRERR_BUFSIZE];
3963                 int err = -errno;
3964
3965                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
3966                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
3967                         map->name, cp, err);
3968                 create_attr.btf_fd = 0;
3969                 create_attr.btf_key_type_id = 0;
3970                 create_attr.btf_value_type_id = 0;
3971                 map->btf_key_type_id = 0;
3972                 map->btf_value_type_id = 0;
3973                 map->fd = bpf_create_map_xattr(&create_attr);
3974         }
3975
3976         if (map->fd < 0)
3977                 return -errno;
3978
3979         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
3980                 bpf_map__destroy(map->inner_map);
3981                 zfree(&map->inner_map);
3982         }
3983
3984         return 0;
3985 }
3986
3987 static int
3988 bpf_object__create_maps(struct bpf_object *obj)
3989 {
3990         struct bpf_map *map;
3991         char *cp, errmsg[STRERR_BUFSIZE];
3992         unsigned int i, j;
3993         int err;
3994
3995         for (i = 0; i < obj->nr_maps; i++) {
3996                 map = &obj->maps[i];
3997
3998                 if (map->pin_path) {
3999                         err = bpf_object__reuse_map(map);
4000                         if (err) {
4001                                 pr_warn("map '%s': error reusing pinned map\n",
4002                                         map->name);
4003                                 goto err_out;
4004                         }
4005                 }
4006
4007                 if (map->fd >= 0) {
4008                         pr_debug("map '%s': skipping creation (preset fd=%d)\n",
4009                                  map->name, map->fd);
4010                         continue;
4011                 }
4012
4013                 err = bpf_object__create_map(obj, map);
4014                 if (err)
4015                         goto err_out;
4016
4017                 pr_debug("map '%s': created successfully, fd=%d\n", map->name,
4018                          map->fd);
4019
4020                 if (bpf_map__is_internal(map)) {
4021                         err = bpf_object__populate_internal_map(obj, map);
4022                         if (err < 0) {
4023                                 zclose(map->fd);
4024                                 goto err_out;
4025                         }
4026                 }
4027
4028                 if (map->init_slots_sz) {
4029                         for (j = 0; j < map->init_slots_sz; j++) {
4030                                 const struct bpf_map *targ_map;
4031                                 int fd;
4032
4033                                 if (!map->init_slots[j])
4034                                         continue;
4035
4036                                 targ_map = map->init_slots[j];
4037                                 fd = bpf_map__fd(targ_map);
4038                                 err = bpf_map_update_elem(map->fd, &j, &fd, 0);
4039                                 if (err) {
4040                                         err = -errno;
4041                                         pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
4042                                                 map->name, j, targ_map->name,
4043                                                 fd, err);
4044                                         goto err_out;
4045                                 }
4046                                 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
4047                                          map->name, j, targ_map->name, fd);
4048                         }
4049                         zfree(&map->init_slots);
4050                         map->init_slots_sz = 0;
4051                 }
4052
4053                 if (map->pin_path && !map->pinned) {
4054                         err = bpf_map__pin(map, NULL);
4055                         if (err) {
4056                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
4057                                         map->name, map->pin_path, err);
4058                                 zclose(map->fd);
4059                                 goto err_out;
4060                         }
4061                 }
4062         }
4063
4064         return 0;
4065
4066 err_out:
4067         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4068         pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
4069         pr_perm_msg(err);
4070         for (j = 0; j < i; j++)
4071                 zclose(obj->maps[j].fd);
4072         return err;
4073 }
4074
4075 static int
4076 check_btf_ext_reloc_err(struct bpf_program *prog, int err,
4077                         void *btf_prog_info, const char *info_name)
4078 {
4079         if (err != -ENOENT) {
4080                 pr_warn("Error in loading %s for sec %s.\n",
4081                         info_name, prog->section_name);
4082                 return err;
4083         }
4084
4085         /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */
4086
4087         if (btf_prog_info) {
4088                 /*
4089                  * Some info has already been found but has problem
4090                  * in the last btf_ext reloc. Must have to error out.
4091                  */
4092                 pr_warn("Error in relocating %s for sec %s.\n",
4093                         info_name, prog->section_name);
4094                 return err;
4095         }
4096
4097         /* Have problem loading the very first info. Ignore the rest. */
4098         pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n",
4099                 info_name, prog->section_name, info_name);
4100         return 0;
4101 }
4102
4103 static int
4104 bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj,
4105                           const char *section_name,  __u32 insn_offset)
4106 {
4107         int err;
4108
4109         if (!insn_offset || prog->func_info) {
4110                 /*
4111                  * !insn_offset => main program
4112                  *
4113                  * For sub prog, the main program's func_info has to
4114                  * be loaded first (i.e. prog->func_info != NULL)
4115                  */
4116                 err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext,
4117                                                section_name, insn_offset,
4118                                                &prog->func_info,
4119                                                &prog->func_info_cnt);
4120                 if (err)
4121                         return check_btf_ext_reloc_err(prog, err,
4122                                                        prog->func_info,
4123                                                        "bpf_func_info");
4124
4125                 prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext);
4126         }
4127
4128         if (!insn_offset || prog->line_info) {
4129                 err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext,
4130                                                section_name, insn_offset,
4131                                                &prog->line_info,
4132                                                &prog->line_info_cnt);
4133                 if (err)
4134                         return check_btf_ext_reloc_err(prog, err,
4135                                                        prog->line_info,
4136                                                        "bpf_line_info");
4137
4138                 prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext);
4139         }
4140
4141         return 0;
4142 }
4143
4144 #define BPF_CORE_SPEC_MAX_LEN 64
4145
4146 /* represents BPF CO-RE field or array element accessor */
4147 struct bpf_core_accessor {
4148         __u32 type_id;          /* struct/union type or array element type */
4149         __u32 idx;              /* field index or array index */
4150         const char *name;       /* field name or NULL for array accessor */
4151 };
4152
4153 struct bpf_core_spec {
4154         const struct btf *btf;
4155         /* high-level spec: named fields and array indices only */
4156         struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
4157         /* original unresolved (no skip_mods_or_typedefs) root type ID */
4158         __u32 root_type_id;
4159         /* CO-RE relocation kind */
4160         enum bpf_core_relo_kind relo_kind;
4161         /* high-level spec length */
4162         int len;
4163         /* raw, low-level spec: 1-to-1 with accessor spec string */
4164         int raw_spec[BPF_CORE_SPEC_MAX_LEN];
4165         /* raw spec length */
4166         int raw_len;
4167         /* field bit offset represented by spec */
4168         __u32 bit_offset;
4169 };
4170
4171 static bool str_is_empty(const char *s)
4172 {
4173         return !s || !s[0];
4174 }
4175
4176 static bool is_flex_arr(const struct btf *btf,
4177                         const struct bpf_core_accessor *acc,
4178                         const struct btf_array *arr)
4179 {
4180         const struct btf_type *t;
4181
4182         /* not a flexible array, if not inside a struct or has non-zero size */
4183         if (!acc->name || arr->nelems > 0)
4184                 return false;
4185
4186         /* has to be the last member of enclosing struct */
4187         t = btf__type_by_id(btf, acc->type_id);
4188         return acc->idx == btf_vlen(t) - 1;
4189 }
4190
4191 static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
4192 {
4193         switch (kind) {
4194         case BPF_FIELD_BYTE_OFFSET: return "byte_off";
4195         case BPF_FIELD_BYTE_SIZE: return "byte_sz";
4196         case BPF_FIELD_EXISTS: return "field_exists";
4197         case BPF_FIELD_SIGNED: return "signed";
4198         case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
4199         case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
4200         case BPF_TYPE_ID_LOCAL: return "local_type_id";
4201         case BPF_TYPE_ID_TARGET: return "target_type_id";
4202         case BPF_TYPE_EXISTS: return "type_exists";
4203         case BPF_TYPE_SIZE: return "type_size";
4204         case BPF_ENUMVAL_EXISTS: return "enumval_exists";
4205         case BPF_ENUMVAL_VALUE: return "enumval_value";
4206         default: return "unknown";
4207         }
4208 }
4209
4210 static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
4211 {
4212         switch (kind) {
4213         case BPF_FIELD_BYTE_OFFSET:
4214         case BPF_FIELD_BYTE_SIZE:
4215         case BPF_FIELD_EXISTS:
4216         case BPF_FIELD_SIGNED:
4217         case BPF_FIELD_LSHIFT_U64:
4218         case BPF_FIELD_RSHIFT_U64:
4219                 return true;
4220         default:
4221                 return false;
4222         }
4223 }
4224
4225 static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
4226 {
4227         switch (kind) {
4228         case BPF_TYPE_ID_LOCAL:
4229         case BPF_TYPE_ID_TARGET:
4230         case BPF_TYPE_EXISTS:
4231         case BPF_TYPE_SIZE:
4232                 return true;
4233         default:
4234                 return false;
4235         }
4236 }
4237
4238 static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
4239 {
4240         switch (kind) {
4241         case BPF_ENUMVAL_EXISTS:
4242         case BPF_ENUMVAL_VALUE:
4243                 return true;
4244         default:
4245                 return false;
4246         }
4247 }
4248
4249 /*
4250  * Turn bpf_core_relo into a low- and high-level spec representation,
4251  * validating correctness along the way, as well as calculating resulting
4252  * field bit offset, specified by accessor string. Low-level spec captures
4253  * every single level of nestedness, including traversing anonymous
4254  * struct/union members. High-level one only captures semantically meaningful
4255  * "turning points": named fields and array indicies.
4256  * E.g., for this case:
4257  *
4258  *   struct sample {
4259  *       int __unimportant;
4260  *       struct {
4261  *           int __1;
4262  *           int __2;
4263  *           int a[7];
4264  *       };
4265  *   };
4266  *
4267  *   struct sample *s = ...;
4268  *
4269  *   int x = &s->a[3]; // access string = '0:1:2:3'
4270  *
4271  * Low-level spec has 1:1 mapping with each element of access string (it's
4272  * just a parsed access string representation): [0, 1, 2, 3].
4273  *
4274  * High-level spec will capture only 3 points:
4275  *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
4276  *   - field 'a' access (corresponds to '2' in low-level spec);
4277  *   - array element #3 access (corresponds to '3' in low-level spec).
4278  *
4279  * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
4280  * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
4281  * spec and raw_spec are kept empty.
4282  *
4283  * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
4284  * string to specify enumerator's value index that need to be relocated.
4285  */
4286 static int bpf_core_parse_spec(const struct btf *btf,
4287                                __u32 type_id,
4288                                const char *spec_str,
4289                                enum bpf_core_relo_kind relo_kind,
4290                                struct bpf_core_spec *spec)
4291 {
4292         int access_idx, parsed_len, i;
4293         struct bpf_core_accessor *acc;
4294         const struct btf_type *t;
4295         const char *name;
4296         __u32 id;
4297         __s64 sz;
4298
4299         if (str_is_empty(spec_str) || *spec_str == ':')
4300                 return -EINVAL;
4301
4302         memset(spec, 0, sizeof(*spec));
4303         spec->btf = btf;
4304         spec->root_type_id = type_id;
4305         spec->relo_kind = relo_kind;
4306
4307         /* type-based relocations don't have a field access string */
4308         if (core_relo_is_type_based(relo_kind)) {
4309                 if (strcmp(spec_str, "0"))
4310                         return -EINVAL;
4311                 return 0;
4312         }
4313
4314         /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
4315         while (*spec_str) {
4316                 if (*spec_str == ':')
4317                         ++spec_str;
4318                 if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
4319                         return -EINVAL;
4320                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4321                         return -E2BIG;
4322                 spec_str += parsed_len;
4323                 spec->raw_spec[spec->raw_len++] = access_idx;
4324         }
4325
4326         if (spec->raw_len == 0)
4327                 return -EINVAL;
4328
4329         t = skip_mods_and_typedefs(btf, type_id, &id);
4330         if (!t)
4331                 return -EINVAL;
4332
4333         access_idx = spec->raw_spec[0];
4334         acc = &spec->spec[0];
4335         acc->type_id = id;
4336         acc->idx = access_idx;
4337         spec->len++;
4338
4339         if (core_relo_is_enumval_based(relo_kind)) {
4340                 if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
4341                         return -EINVAL;
4342
4343                 /* record enumerator name in a first accessor */
4344                 acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
4345                 return 0;
4346         }
4347
4348         if (!core_relo_is_field_based(relo_kind))
4349                 return -EINVAL;
4350
4351         sz = btf__resolve_size(btf, id);
4352         if (sz < 0)
4353                 return sz;
4354         spec->bit_offset = access_idx * sz * 8;
4355
4356         for (i = 1; i < spec->raw_len; i++) {
4357                 t = skip_mods_and_typedefs(btf, id, &id);
4358                 if (!t)
4359                         return -EINVAL;
4360
4361                 access_idx = spec->raw_spec[i];
4362                 acc = &spec->spec[spec->len];
4363
4364                 if (btf_is_composite(t)) {
4365                         const struct btf_member *m;
4366                         __u32 bit_offset;
4367
4368                         if (access_idx >= btf_vlen(t))
4369                                 return -EINVAL;
4370
4371                         bit_offset = btf_member_bit_offset(t, access_idx);
4372                         spec->bit_offset += bit_offset;
4373
4374                         m = btf_members(t) + access_idx;
4375                         if (m->name_off) {
4376                                 name = btf__name_by_offset(btf, m->name_off);
4377                                 if (str_is_empty(name))
4378                                         return -EINVAL;
4379
4380                                 acc->type_id = id;
4381                                 acc->idx = access_idx;
4382                                 acc->name = name;
4383                                 spec->len++;
4384                         }
4385
4386                         id = m->type;
4387                 } else if (btf_is_array(t)) {
4388                         const struct btf_array *a = btf_array(t);
4389                         bool flex;
4390
4391                         t = skip_mods_and_typedefs(btf, a->type, &id);
4392                         if (!t)
4393                                 return -EINVAL;
4394
4395                         flex = is_flex_arr(btf, acc - 1, a);
4396                         if (!flex && access_idx >= a->nelems)
4397                                 return -EINVAL;
4398
4399                         spec->spec[spec->len].type_id = id;
4400                         spec->spec[spec->len].idx = access_idx;
4401                         spec->len++;
4402
4403                         sz = btf__resolve_size(btf, id);
4404                         if (sz < 0)
4405                                 return sz;
4406                         spec->bit_offset += access_idx * sz * 8;
4407                 } else {
4408                         pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
4409                                 type_id, spec_str, i, id, btf_kind_str(t));
4410                         return -EINVAL;
4411                 }
4412         }
4413
4414         return 0;
4415 }
4416
4417 static bool bpf_core_is_flavor_sep(const char *s)
4418 {
4419         /* check X___Y name pattern, where X and Y are not underscores */
4420         return s[0] != '_' &&                                 /* X */
4421                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
4422                s[4] != '_';                                   /* Y */
4423 }
4424
4425 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
4426  * before last triple underscore. Struct name part after last triple
4427  * underscore is ignored by BPF CO-RE relocation during relocation matching.
4428  */
4429 static size_t bpf_core_essential_name_len(const char *name)
4430 {
4431         size_t n = strlen(name);
4432         int i;
4433
4434         for (i = n - 5; i >= 0; i--) {
4435                 if (bpf_core_is_flavor_sep(name + i))
4436                         return i + 1;
4437         }
4438         return n;
4439 }
4440
4441 /* dynamically sized list of type IDs */
4442 struct ids_vec {
4443         __u32 *data;
4444         int len;
4445 };
4446
4447 static void bpf_core_free_cands(struct ids_vec *cand_ids)
4448 {
4449         free(cand_ids->data);
4450         free(cand_ids);
4451 }
4452
4453 static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
4454                                            __u32 local_type_id,
4455                                            const struct btf *targ_btf)
4456 {
4457         size_t local_essent_len, targ_essent_len;
4458         const char *local_name, *targ_name;
4459         const struct btf_type *t, *local_t;
4460         struct ids_vec *cand_ids;
4461         __u32 *new_ids;
4462         int i, err, n;
4463
4464         local_t = btf__type_by_id(local_btf, local_type_id);
4465         if (!local_t)
4466                 return ERR_PTR(-EINVAL);
4467
4468         local_name = btf__name_by_offset(local_btf, local_t->name_off);
4469         if (str_is_empty(local_name))
4470                 return ERR_PTR(-EINVAL);
4471         local_essent_len = bpf_core_essential_name_len(local_name);
4472
4473         cand_ids = calloc(1, sizeof(*cand_ids));
4474         if (!cand_ids)
4475                 return ERR_PTR(-ENOMEM);
4476
4477         n = btf__get_nr_types(targ_btf);
4478         for (i = 1; i <= n; i++) {
4479                 t = btf__type_by_id(targ_btf, i);
4480                 if (btf_kind(t) != btf_kind(local_t))
4481                         continue;
4482
4483                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
4484                 if (str_is_empty(targ_name))
4485                         continue;
4486
4487                 targ_essent_len = bpf_core_essential_name_len(targ_name);
4488                 if (targ_essent_len != local_essent_len)
4489                         continue;
4490
4491                 if (strncmp(local_name, targ_name, local_essent_len) == 0) {
4492                         pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
4493                                  local_type_id, btf_kind_str(local_t),
4494                                  local_name, i, btf_kind_str(t), targ_name);
4495                         new_ids = libbpf_reallocarray(cand_ids->data,
4496                                                       cand_ids->len + 1,
4497                                                       sizeof(*cand_ids->data));
4498                         if (!new_ids) {
4499                                 err = -ENOMEM;
4500                                 goto err_out;
4501                         }
4502                         cand_ids->data = new_ids;
4503                         cand_ids->data[cand_ids->len++] = i;
4504                 }
4505         }
4506         return cand_ids;
4507 err_out:
4508         bpf_core_free_cands(cand_ids);
4509         return ERR_PTR(err);
4510 }
4511
4512 /* Check two types for compatibility for the purpose of field access
4513  * relocation. const/volatile/restrict and typedefs are skipped to ensure we
4514  * are relocating semantically compatible entities:
4515  *   - any two STRUCTs/UNIONs are compatible and can be mixed;
4516  *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
4517  *   - any two PTRs are always compatible;
4518  *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
4519  *     least one of enums should be anonymous;
4520  *   - for ENUMs, check sizes, names are ignored;
4521  *   - for INT, size and signedness are ignored;
4522  *   - for ARRAY, dimensionality is ignored, element types are checked for
4523  *     compatibility recursively;
4524  *   - everything else shouldn't be ever a target of relocation.
4525  * These rules are not set in stone and probably will be adjusted as we get
4526  * more experience with using BPF CO-RE relocations.
4527  */
4528 static int bpf_core_fields_are_compat(const struct btf *local_btf,
4529                                       __u32 local_id,
4530                                       const struct btf *targ_btf,
4531                                       __u32 targ_id)
4532 {
4533         const struct btf_type *local_type, *targ_type;
4534
4535 recur:
4536         local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
4537         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4538         if (!local_type || !targ_type)
4539                 return -EINVAL;
4540
4541         if (btf_is_composite(local_type) && btf_is_composite(targ_type))
4542                 return 1;
4543         if (btf_kind(local_type) != btf_kind(targ_type))
4544                 return 0;
4545
4546         switch (btf_kind(local_type)) {
4547         case BTF_KIND_PTR:
4548                 return 1;
4549         case BTF_KIND_FWD:
4550         case BTF_KIND_ENUM: {
4551                 const char *local_name, *targ_name;
4552                 size_t local_len, targ_len;
4553
4554                 local_name = btf__name_by_offset(local_btf,
4555                                                  local_type->name_off);
4556                 targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
4557                 local_len = bpf_core_essential_name_len(local_name);
4558                 targ_len = bpf_core_essential_name_len(targ_name);
4559                 /* one of them is anonymous or both w/ same flavor-less names */
4560                 return local_len == 0 || targ_len == 0 ||
4561                        (local_len == targ_len &&
4562                         strncmp(local_name, targ_name, local_len) == 0);
4563         }
4564         case BTF_KIND_INT:
4565                 /* just reject deprecated bitfield-like integers; all other
4566                  * integers are by default compatible between each other
4567                  */
4568                 return btf_int_offset(local_type) == 0 &&
4569                        btf_int_offset(targ_type) == 0;
4570         case BTF_KIND_ARRAY:
4571                 local_id = btf_array(local_type)->type;
4572                 targ_id = btf_array(targ_type)->type;
4573                 goto recur;
4574         default:
4575                 pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
4576                         btf_kind(local_type), local_id, targ_id);
4577                 return 0;
4578         }
4579 }
4580
4581 /*
4582  * Given single high-level named field accessor in local type, find
4583  * corresponding high-level accessor for a target type. Along the way,
4584  * maintain low-level spec for target as well. Also keep updating target
4585  * bit offset.
4586  *
4587  * Searching is performed through recursive exhaustive enumeration of all
4588  * fields of a struct/union. If there are any anonymous (embedded)
4589  * structs/unions, they are recursively searched as well. If field with
4590  * desired name is found, check compatibility between local and target types,
4591  * before returning result.
4592  *
4593  * 1 is returned, if field is found.
4594  * 0 is returned if no compatible field is found.
4595  * <0 is returned on error.
4596  */
4597 static int bpf_core_match_member(const struct btf *local_btf,
4598                                  const struct bpf_core_accessor *local_acc,
4599                                  const struct btf *targ_btf,
4600                                  __u32 targ_id,
4601                                  struct bpf_core_spec *spec,
4602                                  __u32 *next_targ_id)
4603 {
4604         const struct btf_type *local_type, *targ_type;
4605         const struct btf_member *local_member, *m;
4606         const char *local_name, *targ_name;
4607         __u32 local_id;
4608         int i, n, found;
4609
4610         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4611         if (!targ_type)
4612                 return -EINVAL;
4613         if (!btf_is_composite(targ_type))
4614                 return 0;
4615
4616         local_id = local_acc->type_id;
4617         local_type = btf__type_by_id(local_btf, local_id);
4618         local_member = btf_members(local_type) + local_acc->idx;
4619         local_name = btf__name_by_offset(local_btf, local_member->name_off);
4620
4621         n = btf_vlen(targ_type);
4622         m = btf_members(targ_type);
4623         for (i = 0; i < n; i++, m++) {
4624                 __u32 bit_offset;
4625
4626                 bit_offset = btf_member_bit_offset(targ_type, i);
4627
4628                 /* too deep struct/union/array nesting */
4629                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4630                         return -E2BIG;
4631
4632                 /* speculate this member will be the good one */
4633                 spec->bit_offset += bit_offset;
4634                 spec->raw_spec[spec->raw_len++] = i;
4635
4636                 targ_name = btf__name_by_offset(targ_btf, m->name_off);
4637                 if (str_is_empty(targ_name)) {
4638                         /* embedded struct/union, we need to go deeper */
4639                         found = bpf_core_match_member(local_btf, local_acc,
4640                                                       targ_btf, m->type,
4641                                                       spec, next_targ_id);
4642                         if (found) /* either found or error */
4643                                 return found;
4644                 } else if (strcmp(local_name, targ_name) == 0) {
4645                         /* matching named field */
4646                         struct bpf_core_accessor *targ_acc;
4647
4648                         targ_acc = &spec->spec[spec->len++];
4649                         targ_acc->type_id = targ_id;
4650                         targ_acc->idx = i;
4651                         targ_acc->name = targ_name;
4652
4653                         *next_targ_id = m->type;
4654                         found = bpf_core_fields_are_compat(local_btf,
4655                                                            local_member->type,
4656                                                            targ_btf, m->type);
4657                         if (!found)
4658                                 spec->len--; /* pop accessor */
4659                         return found;
4660                 }
4661                 /* member turned out not to be what we looked for */
4662                 spec->bit_offset -= bit_offset;
4663                 spec->raw_len--;
4664         }
4665
4666         return 0;
4667 }
4668
4669 /* Check local and target types for compatibility. This check is used for
4670  * type-based CO-RE relocations and follow slightly different rules than
4671  * field-based relocations. This function assumes that root types were already
4672  * checked for name match. Beyond that initial root-level name check, names
4673  * are completely ignored. Compatibility rules are as follows:
4674  *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
4675  *     kind should match for local and target types (i.e., STRUCT is not
4676  *     compatible with UNION);
4677  *   - for ENUMs, the size is ignored;
4678  *   - for INT, size and signedness are ignored;
4679  *   - for ARRAY, dimensionality is ignored, element types are checked for
4680  *     compatibility recursively;
4681  *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
4682  *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
4683  *   - FUNC_PROTOs are compatible if they have compatible signature: same
4684  *     number of input args and compatible return and argument types.
4685  * These rules are not set in stone and probably will be adjusted as we get
4686  * more experience with using BPF CO-RE relocations.
4687  */
4688 static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
4689                                      const struct btf *targ_btf, __u32 targ_id)
4690 {
4691         const struct btf_type *local_type, *targ_type;
4692         int depth = 32; /* max recursion depth */
4693
4694         /* caller made sure that names match (ignoring flavor suffix) */
4695         local_type = btf__type_by_id(local_btf, local_id);
4696         targ_type = btf__type_by_id(local_btf, local_id);
4697         if (btf_kind(local_type) != btf_kind(targ_type))
4698                 return 0;
4699
4700 recur:
4701         depth--;
4702         if (depth < 0)
4703                 return -EINVAL;
4704
4705         local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
4706         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4707         if (!local_type || !targ_type)
4708                 return -EINVAL;
4709
4710         if (btf_kind(local_type) != btf_kind(targ_type))
4711                 return 0;
4712
4713         switch (btf_kind(local_type)) {
4714         case BTF_KIND_UNKN:
4715         case BTF_KIND_STRUCT:
4716         case BTF_KIND_UNION:
4717         case BTF_KIND_ENUM:
4718         case BTF_KIND_FWD:
4719                 return 1;
4720         case BTF_KIND_INT:
4721                 /* just reject deprecated bitfield-like integers; all other
4722                  * integers are by default compatible between each other
4723                  */
4724                 return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
4725         case BTF_KIND_PTR:
4726                 local_id = local_type->type;
4727                 targ_id = targ_type->type;
4728                 goto recur;
4729         case BTF_KIND_ARRAY:
4730                 local_id = btf_array(local_type)->type;
4731                 targ_id = btf_array(targ_type)->type;
4732                 goto recur;
4733         case BTF_KIND_FUNC_PROTO: {
4734                 struct btf_param *local_p = btf_params(local_type);
4735                 struct btf_param *targ_p = btf_params(targ_type);
4736                 __u16 local_vlen = btf_vlen(local_type);
4737                 __u16 targ_vlen = btf_vlen(targ_type);
4738                 int i, err;
4739
4740                 if (local_vlen != targ_vlen)
4741                         return 0;
4742
4743                 for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
4744                         skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
4745                         skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
4746                         err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
4747                         if (err <= 0)
4748                                 return err;
4749                 }
4750
4751                 /* tail recurse for return type check */
4752                 skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
4753                 skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
4754                 goto recur;
4755         }
4756         default:
4757                 pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
4758                         btf_kind_str(local_type), local_id, targ_id);
4759                 return 0;
4760         }
4761 }
4762
4763 /*
4764  * Try to match local spec to a target type and, if successful, produce full
4765  * target spec (high-level, low-level + bit offset).
4766  */
4767 static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
4768                                const struct btf *targ_btf, __u32 targ_id,
4769                                struct bpf_core_spec *targ_spec)
4770 {
4771         const struct btf_type *targ_type;
4772         const struct bpf_core_accessor *local_acc;
4773         struct bpf_core_accessor *targ_acc;
4774         int i, sz, matched;
4775
4776         memset(targ_spec, 0, sizeof(*targ_spec));
4777         targ_spec->btf = targ_btf;
4778         targ_spec->root_type_id = targ_id;
4779         targ_spec->relo_kind = local_spec->relo_kind;
4780
4781         if (core_relo_is_type_based(local_spec->relo_kind)) {
4782                 return bpf_core_types_are_compat(local_spec->btf,
4783                                                  local_spec->root_type_id,
4784                                                  targ_btf, targ_id);
4785         }
4786
4787         local_acc = &local_spec->spec[0];
4788         targ_acc = &targ_spec->spec[0];
4789
4790         if (core_relo_is_enumval_based(local_spec->relo_kind)) {
4791                 size_t local_essent_len, targ_essent_len;
4792                 const struct btf_enum *e;
4793                 const char *targ_name;
4794
4795                 /* has to resolve to an enum */
4796                 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
4797                 if (!btf_is_enum(targ_type))
4798                         return 0;
4799
4800                 local_essent_len = bpf_core_essential_name_len(local_acc->name);
4801
4802                 for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
4803                         targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
4804                         targ_essent_len = bpf_core_essential_name_len(targ_name);
4805                         if (targ_essent_len != local_essent_len)
4806                                 continue;
4807                         if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
4808                                 targ_acc->type_id = targ_id;
4809                                 targ_acc->idx = i;
4810                                 targ_acc->name = targ_name;
4811                                 targ_spec->len++;
4812                                 targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
4813                                 targ_spec->raw_len++;
4814                                 return 1;
4815                         }
4816                 }
4817                 return 0;
4818         }
4819
4820         if (!core_relo_is_field_based(local_spec->relo_kind))
4821                 return -EINVAL;
4822
4823         for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
4824                 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
4825                                                    &targ_id);
4826                 if (!targ_type)
4827                         return -EINVAL;
4828
4829                 if (local_acc->name) {
4830                         matched = bpf_core_match_member(local_spec->btf,
4831                                                         local_acc,
4832                                                         targ_btf, targ_id,
4833                                                         targ_spec, &targ_id);
4834                         if (matched <= 0)
4835                                 return matched;
4836                 } else {
4837                         /* for i=0, targ_id is already treated as array element
4838                          * type (because it's the original struct), for others
4839                          * we should find array element type first
4840                          */
4841                         if (i > 0) {
4842                                 const struct btf_array *a;
4843                                 bool flex;
4844
4845                                 if (!btf_is_array(targ_type))
4846                                         return 0;
4847
4848                                 a = btf_array(targ_type);
4849                                 flex = is_flex_arr(targ_btf, targ_acc - 1, a);
4850                                 if (!flex && local_acc->idx >= a->nelems)
4851                                         return 0;
4852                                 if (!skip_mods_and_typedefs(targ_btf, a->type,
4853                                                             &targ_id))
4854                                         return -EINVAL;
4855                         }
4856
4857                         /* too deep struct/union/array nesting */
4858                         if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4859                                 return -E2BIG;
4860
4861                         targ_acc->type_id = targ_id;
4862                         targ_acc->idx = local_acc->idx;
4863                         targ_acc->name = NULL;
4864                         targ_spec->len++;
4865                         targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
4866                         targ_spec->raw_len++;
4867
4868                         sz = btf__resolve_size(targ_btf, targ_id);
4869                         if (sz < 0)
4870                                 return sz;
4871                         targ_spec->bit_offset += local_acc->idx * sz * 8;
4872                 }
4873         }
4874
4875         return 1;
4876 }
4877
4878 static int bpf_core_calc_field_relo(const struct bpf_program *prog,
4879                                     const struct bpf_core_relo *relo,
4880                                     const struct bpf_core_spec *spec,
4881                                     __u32 *val, bool *validate)
4882 {
4883         const struct bpf_core_accessor *acc;
4884         const struct btf_type *t;
4885         __u32 byte_off, byte_sz, bit_off, bit_sz;
4886         const struct btf_member *m;
4887         const struct btf_type *mt;
4888         bool bitfield;
4889         __s64 sz;
4890
4891         if (relo->kind == BPF_FIELD_EXISTS) {
4892                 *val = spec ? 1 : 0;
4893                 return 0;
4894         }
4895
4896         if (!spec)
4897                 return -EUCLEAN; /* request instruction poisoning */
4898
4899         acc = &spec->spec[spec->len - 1];
4900         t = btf__type_by_id(spec->btf, acc->type_id);
4901
4902         /* a[n] accessor needs special handling */
4903         if (!acc->name) {
4904                 if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
4905                         *val = spec->bit_offset / 8;
4906                 } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
4907                         sz = btf__resolve_size(spec->btf, acc->type_id);
4908                         if (sz < 0)
4909                                 return -EINVAL;
4910                         *val = sz;
4911                 } else {
4912                         pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
4913                                 bpf_program__title(prog, false),
4914                                 relo->kind, relo->insn_off / 8);
4915                         return -EINVAL;
4916                 }
4917                 if (validate)
4918                         *validate = true;
4919                 return 0;
4920         }
4921
4922         m = btf_members(t) + acc->idx;
4923         mt = skip_mods_and_typedefs(spec->btf, m->type, NULL);
4924         bit_off = spec->bit_offset;
4925         bit_sz = btf_member_bitfield_size(t, acc->idx);
4926
4927         bitfield = bit_sz > 0;
4928         if (bitfield) {
4929                 byte_sz = mt->size;
4930                 byte_off = bit_off / 8 / byte_sz * byte_sz;
4931                 /* figure out smallest int size necessary for bitfield load */
4932                 while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
4933                         if (byte_sz >= 8) {
4934                                 /* bitfield can't be read with 64-bit read */
4935                                 pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
4936                                         bpf_program__title(prog, false),
4937                                         relo->kind, relo->insn_off / 8);
4938                                 return -E2BIG;
4939                         }
4940                         byte_sz *= 2;
4941                         byte_off = bit_off / 8 / byte_sz * byte_sz;
4942                 }
4943         } else {
4944                 sz = btf__resolve_size(spec->btf, m->type);
4945                 if (sz < 0)
4946                         return -EINVAL;
4947                 byte_sz = sz;
4948                 byte_off = spec->bit_offset / 8;
4949                 bit_sz = byte_sz * 8;
4950         }
4951
4952         /* for bitfields, all the relocatable aspects are ambiguous and we
4953          * might disagree with compiler, so turn off validation of expected
4954          * value, except for signedness
4955          */
4956         if (validate)
4957                 *validate = !bitfield;
4958
4959         switch (relo->kind) {
4960         case BPF_FIELD_BYTE_OFFSET:
4961                 *val = byte_off;
4962                 break;
4963         case BPF_FIELD_BYTE_SIZE:
4964                 *val = byte_sz;
4965                 break;
4966         case BPF_FIELD_SIGNED:
4967                 /* enums will be assumed unsigned */
4968                 *val = btf_is_enum(mt) ||
4969                        (btf_int_encoding(mt) & BTF_INT_SIGNED);
4970                 if (validate)
4971                         *validate = true; /* signedness is never ambiguous */
4972                 break;
4973         case BPF_FIELD_LSHIFT_U64:
4974 #if __BYTE_ORDER == __LITTLE_ENDIAN
4975                 *val = 64 - (bit_off + bit_sz - byte_off  * 8);
4976 #else
4977                 *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
4978 #endif
4979                 break;
4980         case BPF_FIELD_RSHIFT_U64:
4981                 *val = 64 - bit_sz;
4982                 if (validate)
4983                         *validate = true; /* right shift is never ambiguous */
4984                 break;
4985         case BPF_FIELD_EXISTS:
4986         default:
4987                 return -EOPNOTSUPP;
4988         }
4989
4990         return 0;
4991 }
4992
4993 static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
4994                                    const struct bpf_core_spec *spec,
4995                                    __u32 *val)
4996 {
4997         __s64 sz;
4998
4999         /* type-based relos return zero when target type is not found */
5000         if (!spec) {
5001                 *val = 0;
5002                 return 0;
5003         }
5004
5005         switch (relo->kind) {
5006         case BPF_TYPE_ID_TARGET:
5007                 *val = spec->root_type_id;
5008                 break;
5009         case BPF_TYPE_EXISTS:
5010                 *val = 1;
5011                 break;
5012         case BPF_TYPE_SIZE:
5013                 sz = btf__resolve_size(spec->btf, spec->root_type_id);
5014                 if (sz < 0)
5015                         return -EINVAL;
5016                 *val = sz;
5017                 break;
5018         case BPF_TYPE_ID_LOCAL:
5019         /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
5020         default:
5021                 return -EOPNOTSUPP;
5022         }
5023
5024         return 0;
5025 }
5026
5027 static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
5028                                       const struct bpf_core_spec *spec,
5029                                       __u32 *val)
5030 {
5031         const struct btf_type *t;
5032         const struct btf_enum *e;
5033
5034         switch (relo->kind) {
5035         case BPF_ENUMVAL_EXISTS:
5036                 *val = spec ? 1 : 0;
5037                 break;
5038         case BPF_ENUMVAL_VALUE:
5039                 if (!spec)
5040                         return -EUCLEAN; /* request instruction poisoning */
5041                 t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
5042                 e = btf_enum(t) + spec->spec[0].idx;
5043                 *val = e->val;
5044                 break;
5045         default:
5046                 return -EOPNOTSUPP;
5047         }
5048
5049         return 0;
5050 }
5051
5052 struct bpf_core_relo_res
5053 {
5054         /* expected value in the instruction, unless validate == false */
5055         __u32 orig_val;
5056         /* new value that needs to be patched up to */
5057         __u32 new_val;
5058         /* relocation unsuccessful, poison instruction, but don't fail load */
5059         bool poison;
5060         /* some relocations can't be validated against orig_val */
5061         bool validate;
5062 };
5063
5064 /* Calculate original and target relocation values, given local and target
5065  * specs and relocation kind. These values are calculated for each candidate.
5066  * If there are multiple candidates, resulting values should all be consistent
5067  * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
5068  * If instruction has to be poisoned, *poison will be set to true.
5069  */
5070 static int bpf_core_calc_relo(const struct bpf_program *prog,
5071                               const struct bpf_core_relo *relo,
5072                               int relo_idx,
5073                               const struct bpf_core_spec *local_spec,
5074                               const struct bpf_core_spec *targ_spec,
5075                               struct bpf_core_relo_res *res)
5076 {
5077         int err = -EOPNOTSUPP;
5078
5079         res->orig_val = 0;
5080         res->new_val = 0;
5081         res->poison = false;
5082         res->validate = true;
5083
5084         if (core_relo_is_field_based(relo->kind)) {
5085                 err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate);
5086                 err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL);
5087         } else if (core_relo_is_type_based(relo->kind)) {
5088                 err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
5089                 err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
5090         } else if (core_relo_is_enumval_based(relo->kind)) {
5091                 err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
5092                 err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
5093         }
5094
5095         if (err == -EUCLEAN) {
5096                 /* EUCLEAN is used to signal instruction poisoning request */
5097                 res->poison = true;
5098                 err = 0;
5099         } else if (err == -EOPNOTSUPP) {
5100                 /* EOPNOTSUPP means unknown/unsupported relocation */
5101                 pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
5102                         bpf_program__title(prog, false), relo_idx,
5103                         core_relo_kind_str(relo->kind), relo->kind, relo->insn_off / 8);
5104         }
5105
5106         return err;
5107 }
5108
5109 /*
5110  * Turn instruction for which CO_RE relocation failed into invalid one with
5111  * distinct signature.
5112  */
5113 static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
5114                                  int insn_idx, struct bpf_insn *insn)
5115 {
5116         pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
5117                  bpf_program__title(prog, false), relo_idx, insn_idx);
5118         insn->code = BPF_JMP | BPF_CALL;
5119         insn->dst_reg = 0;
5120         insn->src_reg = 0;
5121         insn->off = 0;
5122         /* if this instruction is reachable (not a dead code),
5123          * verifier will complain with the following message:
5124          * invalid func unknown#195896080
5125          */
5126         insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
5127 }
5128
5129 static bool is_ldimm64(struct bpf_insn *insn)
5130 {
5131         return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
5132 }
5133
5134 /*
5135  * Patch relocatable BPF instruction.
5136  *
5137  * Patched value is determined by relocation kind and target specification.
5138  * For existence relocations target spec will be NULL if field/type is not found.
5139  * Expected insn->imm value is determined using relocation kind and local
5140  * spec, and is checked before patching instruction. If actual insn->imm value
5141  * is wrong, bail out with error.
5142  *
5143  * Currently three kinds of BPF instructions are supported:
5144  * 1. rX = <imm> (assignment with immediate operand);
5145  * 2. rX += <imm> (arithmetic operations with immediate operand);
5146  * 3. rX = <imm64> (load with 64-bit immediate value).
5147  */
5148 static int bpf_core_patch_insn(struct bpf_program *prog,
5149                                const struct bpf_core_relo *relo,
5150                                int relo_idx,
5151                                const struct bpf_core_relo_res *res)
5152 {
5153         __u32 orig_val, new_val;
5154         struct bpf_insn *insn;
5155         int insn_idx;
5156         __u8 class;
5157
5158         if (relo->insn_off % sizeof(struct bpf_insn))
5159                 return -EINVAL;
5160         insn_idx = relo->insn_off / sizeof(struct bpf_insn);
5161         insn = &prog->insns[insn_idx];
5162         class = BPF_CLASS(insn->code);
5163
5164         if (res->poison) {
5165                 /* poison second part of ldimm64 to avoid confusing error from
5166                  * verifier about "unknown opcode 00"
5167                  */
5168                 if (is_ldimm64(insn))
5169                         bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
5170                 bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
5171                 return 0;
5172         }
5173
5174         orig_val = res->orig_val;
5175         new_val = res->new_val;
5176
5177         switch (class) {
5178         case BPF_ALU:
5179         case BPF_ALU64:
5180                 if (BPF_SRC(insn->code) != BPF_K)
5181                         return -EINVAL;
5182                 if (res->validate && insn->imm != orig_val) {
5183                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
5184                                 bpf_program__title(prog, false), relo_idx,
5185                                 insn_idx, insn->imm, orig_val, new_val);
5186                         return -EINVAL;
5187                 }
5188                 orig_val = insn->imm;
5189                 insn->imm = new_val;
5190                 pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
5191                          bpf_program__title(prog, false), relo_idx, insn_idx,
5192                          orig_val, new_val);
5193                 break;
5194         case BPF_LDX:
5195         case BPF_ST:
5196         case BPF_STX:
5197                 if (res->validate && insn->off != orig_val) {
5198                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
5199                                 bpf_program__title(prog, false), relo_idx,
5200                                 insn_idx, insn->off, orig_val, new_val);
5201                         return -EINVAL;
5202                 }
5203                 if (new_val > SHRT_MAX) {
5204                         pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
5205                                 bpf_program__title(prog, false), relo_idx,
5206                                 insn_idx, new_val);
5207                         return -ERANGE;
5208                 }
5209                 orig_val = insn->off;
5210                 insn->off = new_val;
5211                 pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
5212                          bpf_program__title(prog, false), relo_idx, insn_idx,
5213                          orig_val, new_val);
5214                 break;
5215         case BPF_LD: {
5216                 __u64 imm;
5217
5218                 if (!is_ldimm64(insn) ||
5219                     insn[0].src_reg != 0 || insn[0].off != 0 ||
5220                     insn_idx + 1 >= prog->insns_cnt ||
5221                     insn[1].code != 0 || insn[1].dst_reg != 0 ||
5222                     insn[1].src_reg != 0 || insn[1].off != 0) {
5223                         pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
5224                                 bpf_program__title(prog, false), relo_idx, insn_idx);
5225                         return -EINVAL;
5226                 }
5227
5228                 imm = insn[0].imm + ((__u64)insn[1].imm << 32);
5229                 if (res->validate && imm != orig_val) {
5230                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
5231                                 bpf_program__title(prog, false), relo_idx,
5232                                 insn_idx, imm, orig_val, new_val);
5233                         return -EINVAL;
5234                 }
5235
5236                 insn[0].imm = new_val;
5237                 insn[1].imm = 0; /* currently only 32-bit values are supported */
5238                 pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
5239                          bpf_program__title(prog, false), relo_idx, insn_idx,
5240                          imm, new_val);
5241                 break;
5242         }
5243         default:
5244                 pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
5245                         bpf_program__title(prog, false), relo_idx,
5246                         insn_idx, insn->code, insn->src_reg, insn->dst_reg,
5247                         insn->off, insn->imm);
5248                 return -EINVAL;
5249         }
5250
5251         return 0;
5252 }
5253
5254 /* Output spec definition in the format:
5255  * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
5256  * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
5257  */
5258 static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
5259 {
5260         const struct btf_type *t;
5261         const struct btf_enum *e;
5262         const char *s;
5263         __u32 type_id;
5264         int i;
5265
5266         type_id = spec->root_type_id;
5267         t = btf__type_by_id(spec->btf, type_id);
5268         s = btf__name_by_offset(spec->btf, t->name_off);
5269
5270         libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
5271
5272         if (core_relo_is_type_based(spec->relo_kind))
5273                 return;
5274
5275         if (core_relo_is_enumval_based(spec->relo_kind)) {
5276                 t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
5277                 e = btf_enum(t) + spec->raw_spec[0];
5278                 s = btf__name_by_offset(spec->btf, e->name_off);
5279
5280                 libbpf_print(level, "::%s = %u", s, e->val);
5281                 return;
5282         }
5283
5284         if (core_relo_is_field_based(spec->relo_kind)) {
5285                 for (i = 0; i < spec->len; i++) {
5286                         if (spec->spec[i].name)
5287                                 libbpf_print(level, ".%s", spec->spec[i].name);
5288                         else if (i > 0 || spec->spec[i].idx > 0)
5289                                 libbpf_print(level, "[%u]", spec->spec[i].idx);
5290                 }
5291
5292                 libbpf_print(level, " (");
5293                 for (i = 0; i < spec->raw_len; i++)
5294                         libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
5295
5296                 if (spec->bit_offset % 8)
5297                         libbpf_print(level, " @ offset %u.%u)",
5298                                      spec->bit_offset / 8, spec->bit_offset % 8);
5299                 else
5300                         libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
5301                 return;
5302         }
5303 }
5304
5305 static size_t bpf_core_hash_fn(const void *key, void *ctx)
5306 {
5307         return (size_t)key;
5308 }
5309
5310 static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
5311 {
5312         return k1 == k2;
5313 }
5314
5315 static void *u32_as_hash_key(__u32 x)
5316 {
5317         return (void *)(uintptr_t)x;
5318 }
5319
5320 /*
5321  * CO-RE relocate single instruction.
5322  *
5323  * The outline and important points of the algorithm:
5324  * 1. For given local type, find corresponding candidate target types.
5325  *    Candidate type is a type with the same "essential" name, ignoring
5326  *    everything after last triple underscore (___). E.g., `sample`,
5327  *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
5328  *    for each other. Names with triple underscore are referred to as
5329  *    "flavors" and are useful, among other things, to allow to
5330  *    specify/support incompatible variations of the same kernel struct, which
5331  *    might differ between different kernel versions and/or build
5332  *    configurations.
5333  *
5334  *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
5335  *    converter, when deduplicated BTF of a kernel still contains more than
5336  *    one different types with the same name. In that case, ___2, ___3, etc
5337  *    are appended starting from second name conflict. But start flavors are
5338  *    also useful to be defined "locally", in BPF program, to extract same
5339  *    data from incompatible changes between different kernel
5340  *    versions/configurations. For instance, to handle field renames between
5341  *    kernel versions, one can use two flavors of the struct name with the
5342  *    same common name and use conditional relocations to extract that field,
5343  *    depending on target kernel version.
5344  * 2. For each candidate type, try to match local specification to this
5345  *    candidate target type. Matching involves finding corresponding
5346  *    high-level spec accessors, meaning that all named fields should match,
5347  *    as well as all array accesses should be within the actual bounds. Also,
5348  *    types should be compatible (see bpf_core_fields_are_compat for details).
5349  * 3. It is supported and expected that there might be multiple flavors
5350  *    matching the spec. As long as all the specs resolve to the same set of
5351  *    offsets across all candidates, there is no error. If there is any
5352  *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
5353  *    imprefection of BTF deduplication, which can cause slight duplication of
5354  *    the same BTF type, if some directly or indirectly referenced (by
5355  *    pointer) type gets resolved to different actual types in different
5356  *    object files. If such situation occurs, deduplicated BTF will end up
5357  *    with two (or more) structurally identical types, which differ only in
5358  *    types they refer to through pointer. This should be OK in most cases and
5359  *    is not an error.
5360  * 4. Candidate types search is performed by linearly scanning through all
5361  *    types in target BTF. It is anticipated that this is overall more
5362  *    efficient memory-wise and not significantly worse (if not better)
5363  *    CPU-wise compared to prebuilding a map from all local type names to
5364  *    a list of candidate type names. It's also sped up by caching resolved
5365  *    list of matching candidates per each local "root" type ID, that has at
5366  *    least one bpf_core_relo associated with it. This list is shared
5367  *    between multiple relocations for the same type ID and is updated as some
5368  *    of the candidates are pruned due to structural incompatibility.
5369  */
5370 static int bpf_core_apply_relo(struct bpf_program *prog,
5371                                const struct bpf_core_relo *relo,
5372                                int relo_idx,
5373                                const struct btf *local_btf,
5374                                const struct btf *targ_btf,
5375                                struct hashmap *cand_cache)
5376 {
5377         const char *prog_name = bpf_program__title(prog, false);
5378         struct bpf_core_spec local_spec, cand_spec, targ_spec;
5379         const void *type_key = u32_as_hash_key(relo->type_id);
5380         struct bpf_core_relo_res cand_res, targ_res;
5381         const struct btf_type *local_type;
5382         const char *local_name;
5383         struct ids_vec *cand_ids;
5384         __u32 local_id, cand_id;
5385         const char *spec_str;
5386         int i, j, err;
5387
5388         local_id = relo->type_id;
5389         local_type = btf__type_by_id(local_btf, local_id);
5390         if (!local_type)
5391                 return -EINVAL;
5392
5393         local_name = btf__name_by_offset(local_btf, local_type->name_off);
5394         if (!local_name)
5395                 return -EINVAL;
5396
5397         spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
5398         if (str_is_empty(spec_str))
5399                 return -EINVAL;
5400
5401         err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
5402         if (err) {
5403                 pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
5404                         prog_name, relo_idx, local_id, btf_kind_str(local_type),
5405                         str_is_empty(local_name) ? "<anon>" : local_name,
5406                         spec_str, err);
5407                 return -EINVAL;
5408         }
5409
5410         pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
5411                  relo_idx, core_relo_kind_str(relo->kind), relo->kind);
5412         bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
5413         libbpf_print(LIBBPF_DEBUG, "\n");
5414
5415         /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
5416         if (relo->kind == BPF_TYPE_ID_LOCAL) {
5417                 targ_res.validate = true;
5418                 targ_res.poison = false;
5419                 targ_res.orig_val = local_spec.root_type_id;
5420                 targ_res.new_val = local_spec.root_type_id;
5421                 goto patch_insn;
5422         }
5423
5424         /* libbpf doesn't support candidate search for anonymous types */
5425         if (str_is_empty(spec_str)) {
5426                 pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
5427                         prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
5428                 return -EOPNOTSUPP;
5429         }
5430
5431         if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
5432                 cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
5433                 if (IS_ERR(cand_ids)) {
5434                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
5435                                 prog_name, relo_idx, local_id, btf_kind_str(local_type),
5436                                 local_name, PTR_ERR(cand_ids));
5437                         return PTR_ERR(cand_ids);
5438                 }
5439                 err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
5440                 if (err) {
5441                         bpf_core_free_cands(cand_ids);
5442                         return err;
5443                 }
5444         }
5445
5446         for (i = 0, j = 0; i < cand_ids->len; i++) {
5447                 cand_id = cand_ids->data[i];
5448                 err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
5449                 if (err < 0) {
5450                         pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
5451                                 prog_name, relo_idx, i);
5452                         bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
5453                         libbpf_print(LIBBPF_WARN, ": %d\n", err);
5454                         return err;
5455                 }
5456
5457                 pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
5458                          relo_idx, err == 0 ? "non-matching" : "matching", i);
5459                 bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
5460                 libbpf_print(LIBBPF_DEBUG, "\n");
5461
5462                 if (err == 0)
5463                         continue;
5464
5465                 err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
5466                 if (err)
5467                         return err;
5468
5469                 if (j == 0) {
5470                         targ_res = cand_res;
5471                         targ_spec = cand_spec;
5472                 } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
5473                         /* if there are many field relo candidates, they
5474                          * should all resolve to the same bit offset
5475                          */
5476                         pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
5477                                 prog_name, relo_idx, cand_spec.bit_offset,
5478                                 targ_spec.bit_offset);
5479                         return -EINVAL;
5480                 } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
5481                         /* all candidates should result in the same relocation
5482                          * decision and value, otherwise it's dangerous to
5483                          * proceed due to ambiguity
5484                          */
5485                         pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
5486                                 prog_name, relo_idx,
5487                                 cand_res.poison ? "failure" : "success", cand_res.new_val,
5488                                 targ_res.poison ? "failure" : "success", targ_res.new_val);
5489                         return -EINVAL;
5490                 }
5491
5492                 cand_ids->data[j++] = cand_spec.root_type_id;
5493         }
5494
5495         /*
5496          * For BPF_FIELD_EXISTS relo or when used BPF program has field
5497          * existence checks or kernel version/config checks, it's expected
5498          * that we might not find any candidates. In this case, if field
5499          * wasn't found in any candidate, the list of candidates shouldn't
5500          * change at all, we'll just handle relocating appropriately,
5501          * depending on relo's kind.
5502          */
5503         if (j > 0)
5504                 cand_ids->len = j;
5505
5506         /*
5507          * If no candidates were found, it might be both a programmer error,
5508          * as well as expected case, depending whether instruction w/
5509          * relocation is guarded in some way that makes it unreachable (dead
5510          * code) if relocation can't be resolved. This is handled in
5511          * bpf_core_patch_insn() uniformly by replacing that instruction with
5512          * BPF helper call insn (using invalid helper ID). If that instruction
5513          * is indeed unreachable, then it will be ignored and eliminated by
5514          * verifier. If it was an error, then verifier will complain and point
5515          * to a specific instruction number in its log.
5516          */
5517         if (j == 0) {
5518                 pr_debug("prog '%s': relo #%d: no matching targets found\n",
5519                          prog_name, relo_idx);
5520
5521                 /* calculate single target relo result explicitly */
5522                 err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
5523                 if (err)
5524                         return err;
5525         }
5526
5527 patch_insn:
5528         /* bpf_core_patch_insn() should know how to handle missing targ_spec */
5529         err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
5530         if (err) {
5531                 pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
5532                         prog_name, relo_idx, relo->insn_off, err);
5533                 return -EINVAL;
5534         }
5535
5536         return 0;
5537 }
5538
5539 static int
5540 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5541 {
5542         const struct btf_ext_info_sec *sec;
5543         const struct bpf_core_relo *rec;
5544         const struct btf_ext_info *seg;
5545         struct hashmap_entry *entry;
5546         struct hashmap *cand_cache = NULL;
5547         struct bpf_program *prog;
5548         struct btf *targ_btf;
5549         const char *sec_name;
5550         int i, err = 0;
5551
5552         if (obj->btf_ext->core_relo_info.len == 0)
5553                 return 0;
5554
5555         if (targ_btf_path)
5556                 targ_btf = btf__parse_elf(targ_btf_path, NULL);
5557         else
5558                 targ_btf = obj->btf_vmlinux;
5559         if (IS_ERR_OR_NULL(targ_btf)) {
5560                 pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
5561                 return PTR_ERR(targ_btf);
5562         }
5563
5564         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5565         if (IS_ERR(cand_cache)) {
5566                 err = PTR_ERR(cand_cache);
5567                 goto out;
5568         }
5569
5570         seg = &obj->btf_ext->core_relo_info;
5571         for_each_btf_ext_sec(seg, sec) {
5572                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5573                 if (str_is_empty(sec_name)) {
5574                         err = -EINVAL;
5575                         goto out;
5576                 }
5577                 prog = NULL;
5578                 for (i = 0; i < obj->nr_programs; i++) {
5579                         if (!strcmp(obj->programs[i].section_name, sec_name)) {
5580                                 prog = &obj->programs[i];
5581                                 break;
5582                         }
5583                 }
5584                 if (!prog) {
5585                         pr_warn("failed to find program '%s' for CO-RE offset relocation\n",
5586                                 sec_name);
5587                         err = -EINVAL;
5588                         goto out;
5589                 }
5590
5591                 pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
5592                          sec_name, sec->num_info);
5593
5594                 for_each_btf_ext_rec(seg, sec, i, rec) {
5595                         err = bpf_core_apply_relo(prog, rec, i, obj->btf,
5596                                                   targ_btf, cand_cache);
5597                         if (err) {
5598                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5599                                         sec_name, i, err);
5600                                 goto out;
5601                         }
5602                 }
5603         }
5604
5605 out:
5606         /* obj->btf_vmlinux is freed at the end of object load phase */
5607         if (targ_btf != obj->btf_vmlinux)
5608                 btf__free(targ_btf);
5609         if (!IS_ERR_OR_NULL(cand_cache)) {
5610                 hashmap__for_each_entry(cand_cache, entry, i) {
5611                         bpf_core_free_cands(entry->value);
5612                 }
5613                 hashmap__free(cand_cache);
5614         }
5615         return err;
5616 }
5617
5618 static int
5619 bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
5620                         struct reloc_desc *relo)
5621 {
5622         struct bpf_insn *insn, *new_insn;
5623         struct bpf_program *text;
5624         size_t new_cnt;
5625         int err;
5626
5627         if (prog->idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) {
5628                 text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
5629                 if (!text) {
5630                         pr_warn("no .text section found yet relo into text exist\n");
5631                         return -LIBBPF_ERRNO__RELOC;
5632                 }
5633                 new_cnt = prog->insns_cnt + text->insns_cnt;
5634                 new_insn = libbpf_reallocarray(prog->insns, new_cnt, sizeof(*insn));
5635                 if (!new_insn) {
5636                         pr_warn("oom in prog realloc\n");
5637                         return -ENOMEM;
5638                 }
5639                 prog->insns = new_insn;
5640
5641                 if (obj->btf_ext) {
5642                         err = bpf_program_reloc_btf_ext(prog, obj,
5643                                                         text->section_name,
5644                                                         prog->insns_cnt);
5645                         if (err)
5646                                 return err;
5647                 }
5648
5649                 memcpy(new_insn + prog->insns_cnt, text->insns,
5650                        text->insns_cnt * sizeof(*insn));
5651                 prog->main_prog_cnt = prog->insns_cnt;
5652                 prog->insns_cnt = new_cnt;
5653                 pr_debug("added %zd insn from %s to prog %s\n",
5654                          text->insns_cnt, text->section_name,
5655                          prog->section_name);
5656         }
5657
5658         insn = &prog->insns[relo->insn_idx];
5659         insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx;
5660         return 0;
5661 }
5662
5663 static int
5664 bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
5665 {
5666         int i, err;
5667
5668         if (!prog)
5669                 return 0;
5670
5671         if (obj->btf_ext) {
5672                 err = bpf_program_reloc_btf_ext(prog, obj,
5673                                                 prog->section_name, 0);
5674                 if (err)
5675                         return err;
5676         }
5677
5678         if (!prog->reloc_desc)
5679                 return 0;
5680
5681         for (i = 0; i < prog->nr_reloc; i++) {
5682                 struct reloc_desc *relo = &prog->reloc_desc[i];
5683                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
5684                 struct extern_desc *ext;
5685
5686                 if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
5687                         pr_warn("relocation out of range: '%s'\n",
5688                                 prog->section_name);
5689                         return -LIBBPF_ERRNO__RELOC;
5690                 }
5691
5692                 switch (relo->type) {
5693                 case RELO_LD64:
5694                         insn[0].src_reg = BPF_PSEUDO_MAP_FD;
5695                         insn[0].imm = obj->maps[relo->map_idx].fd;
5696                         break;
5697                 case RELO_DATA:
5698                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5699                         insn[1].imm = insn[0].imm + relo->sym_off;
5700                         insn[0].imm = obj->maps[relo->map_idx].fd;
5701                         break;
5702                 case RELO_EXTERN:
5703                         ext = &obj->externs[relo->sym_off];
5704                         if (ext->type == EXT_KCFG) {
5705                                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5706                                 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
5707                                 insn[1].imm = ext->kcfg.data_off;
5708                         } else /* EXT_KSYM */ {
5709                                 insn[0].imm = (__u32)ext->ksym.addr;
5710                                 insn[1].imm = ext->ksym.addr >> 32;
5711                         }
5712                         break;
5713                 case RELO_CALL:
5714                         err = bpf_program__reloc_text(prog, obj, relo);
5715                         if (err)
5716                                 return err;
5717                         break;
5718                 default:
5719                         pr_warn("relo #%d: bad relo type %d\n", i, relo->type);
5720                         return -EINVAL;
5721                 }
5722         }
5723
5724         zfree(&prog->reloc_desc);
5725         prog->nr_reloc = 0;
5726         return 0;
5727 }
5728
5729 static int
5730 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
5731 {
5732         struct bpf_program *prog;
5733         size_t i;
5734         int err;
5735
5736         if (obj->btf_ext) {
5737                 err = bpf_object__relocate_core(obj, targ_btf_path);
5738                 if (err) {
5739                         pr_warn("failed to perform CO-RE relocations: %d\n",
5740                                 err);
5741                         return err;
5742                 }
5743         }
5744         /* ensure .text is relocated first, as it's going to be copied as-is
5745          * later for sub-program calls
5746          */
5747         for (i = 0; i < obj->nr_programs; i++) {
5748                 prog = &obj->programs[i];
5749                 if (prog->idx != obj->efile.text_shndx)
5750                         continue;
5751
5752                 err = bpf_program__relocate(prog, obj);
5753                 if (err) {
5754                         pr_warn("failed to relocate '%s'\n", prog->section_name);
5755                         return err;
5756                 }
5757                 break;
5758         }
5759         /* now relocate everything but .text, which by now is relocated
5760          * properly, so we can copy raw sub-program instructions as is safely
5761          */
5762         for (i = 0; i < obj->nr_programs; i++) {
5763                 prog = &obj->programs[i];
5764                 if (prog->idx == obj->efile.text_shndx)
5765                         continue;
5766
5767                 err = bpf_program__relocate(prog, obj);
5768                 if (err) {
5769                         pr_warn("failed to relocate '%s'\n", prog->section_name);
5770                         return err;
5771                 }
5772         }
5773         return 0;
5774 }
5775
5776 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
5777                                             GElf_Shdr *shdr, Elf_Data *data);
5778
5779 static int bpf_object__collect_map_relos(struct bpf_object *obj,
5780                                          GElf_Shdr *shdr, Elf_Data *data)
5781 {
5782         const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
5783         int i, j, nrels, new_sz;
5784         const struct btf_var_secinfo *vi = NULL;
5785         const struct btf_type *sec, *var, *def;
5786         const struct btf_member *member;
5787         struct bpf_map *map, *targ_map;
5788         const char *name, *mname;
5789         Elf_Data *symbols;
5790         unsigned int moff;
5791         GElf_Sym sym;
5792         GElf_Rel rel;
5793         void *tmp;
5794
5795         if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
5796                 return -EINVAL;
5797         sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
5798         if (!sec)
5799                 return -EINVAL;
5800
5801         symbols = obj->efile.symbols;
5802         nrels = shdr->sh_size / shdr->sh_entsize;
5803         for (i = 0; i < nrels; i++) {
5804                 if (!gelf_getrel(data, i, &rel)) {
5805                         pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
5806                         return -LIBBPF_ERRNO__FORMAT;
5807                 }
5808                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
5809                         pr_warn(".maps relo #%d: symbol %zx not found\n",
5810                                 i, (size_t)GELF_R_SYM(rel.r_info));
5811                         return -LIBBPF_ERRNO__FORMAT;
5812                 }
5813                 name = elf_sym_str(obj, sym.st_name) ?: "<?>";
5814                 if (sym.st_shndx != obj->efile.btf_maps_shndx) {
5815                         pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
5816                                 i, name);
5817                         return -LIBBPF_ERRNO__RELOC;
5818                 }
5819
5820                 pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
5821                          i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
5822                          (size_t)rel.r_offset, sym.st_name, name);
5823
5824                 for (j = 0; j < obj->nr_maps; j++) {
5825                         map = &obj->maps[j];
5826                         if (map->sec_idx != obj->efile.btf_maps_shndx)
5827                                 continue;
5828
5829                         vi = btf_var_secinfos(sec) + map->btf_var_idx;
5830                         if (vi->offset <= rel.r_offset &&
5831                             rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
5832                                 break;
5833                 }
5834                 if (j == obj->nr_maps) {
5835                         pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
5836                                 i, name, (size_t)rel.r_offset);
5837                         return -EINVAL;
5838                 }
5839
5840                 if (!bpf_map_type__is_map_in_map(map->def.type))
5841                         return -EINVAL;
5842                 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
5843                     map->def.key_size != sizeof(int)) {
5844                         pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
5845                                 i, map->name, sizeof(int));
5846                         return -EINVAL;
5847                 }
5848
5849                 targ_map = bpf_object__find_map_by_name(obj, name);
5850                 if (!targ_map)
5851                         return -ESRCH;
5852
5853                 var = btf__type_by_id(obj->btf, vi->type);
5854                 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
5855                 if (btf_vlen(def) == 0)
5856                         return -EINVAL;
5857                 member = btf_members(def) + btf_vlen(def) - 1;
5858                 mname = btf__name_by_offset(obj->btf, member->name_off);
5859                 if (strcmp(mname, "values"))
5860                         return -EINVAL;
5861
5862                 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
5863                 if (rel.r_offset - vi->offset < moff)
5864                         return -EINVAL;
5865
5866                 moff = rel.r_offset - vi->offset - moff;
5867                 /* here we use BPF pointer size, which is always 64 bit, as we
5868                  * are parsing ELF that was built for BPF target
5869                  */
5870                 if (moff % bpf_ptr_sz)
5871                         return -EINVAL;
5872                 moff /= bpf_ptr_sz;
5873                 if (moff >= map->init_slots_sz) {
5874                         new_sz = moff + 1;
5875                         tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
5876                         if (!tmp)
5877                                 return -ENOMEM;
5878                         map->init_slots = tmp;
5879                         memset(map->init_slots + map->init_slots_sz, 0,
5880                                (new_sz - map->init_slots_sz) * host_ptr_sz);
5881                         map->init_slots_sz = new_sz;
5882                 }
5883                 map->init_slots[moff] = targ_map;
5884
5885                 pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
5886                          i, map->name, moff, name);
5887         }
5888
5889         return 0;
5890 }
5891
5892 static int bpf_object__collect_reloc(struct bpf_object *obj)
5893 {
5894         int i, err;
5895
5896         if (!obj_elf_valid(obj)) {
5897                 pr_warn("Internal error: elf object is closed\n");
5898                 return -LIBBPF_ERRNO__INTERNAL;
5899         }
5900
5901         for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
5902                 GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
5903                 Elf_Data *data = obj->efile.reloc_sects[i].data;
5904                 int idx = shdr->sh_info;
5905                 struct bpf_program *prog;
5906
5907                 if (shdr->sh_type != SHT_REL) {
5908                         pr_warn("internal error at %d\n", __LINE__);
5909                         return -LIBBPF_ERRNO__INTERNAL;
5910                 }
5911
5912                 if (idx == obj->efile.st_ops_shndx) {
5913                         err = bpf_object__collect_st_ops_relos(obj, shdr, data);
5914                 } else if (idx == obj->efile.btf_maps_shndx) {
5915                         err = bpf_object__collect_map_relos(obj, shdr, data);
5916                 } else {
5917                         prog = bpf_object__find_prog_by_idx(obj, idx);
5918                         if (!prog) {
5919                                 pr_warn("relocation failed: no prog in section(%d)\n", idx);
5920                                 return -LIBBPF_ERRNO__RELOC;
5921                         }
5922                         err = bpf_program__collect_reloc(prog, shdr, data, obj);
5923                 }
5924                 if (err)
5925                         return err;
5926         }
5927         return 0;
5928 }
5929
5930 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
5931 {
5932         if (BPF_CLASS(insn->code) == BPF_JMP &&
5933             BPF_OP(insn->code) == BPF_CALL &&
5934             BPF_SRC(insn->code) == BPF_K &&
5935             insn->src_reg == 0 &&
5936             insn->dst_reg == 0) {
5937                     *func_id = insn->imm;
5938                     return true;
5939         }
5940         return false;
5941 }
5942
5943 static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
5944 {
5945         struct bpf_insn *insn = prog->insns;
5946         enum bpf_func_id func_id;
5947         int i;
5948
5949         for (i = 0; i < prog->insns_cnt; i++, insn++) {
5950                 if (!insn_is_helper_call(insn, &func_id))
5951                         continue;
5952
5953                 /* on kernels that don't yet support
5954                  * bpf_probe_read_{kernel,user}[_str] helpers, fall back
5955                  * to bpf_probe_read() which works well for old kernels
5956                  */
5957                 switch (func_id) {
5958                 case BPF_FUNC_probe_read_kernel:
5959                 case BPF_FUNC_probe_read_user:
5960                         if (!kernel_supports(FEAT_PROBE_READ_KERN))
5961                                 insn->imm = BPF_FUNC_probe_read;
5962                         break;
5963                 case BPF_FUNC_probe_read_kernel_str:
5964                 case BPF_FUNC_probe_read_user_str:
5965                         if (!kernel_supports(FEAT_PROBE_READ_KERN))
5966                                 insn->imm = BPF_FUNC_probe_read_str;
5967                         break;
5968                 default:
5969                         break;
5970                 }
5971         }
5972         return 0;
5973 }
5974
5975 static int
5976 load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
5977              char *license, __u32 kern_version, int *pfd)
5978 {
5979         struct bpf_load_program_attr load_attr;
5980         char *cp, errmsg[STRERR_BUFSIZE];
5981         size_t log_buf_size = 0;
5982         char *log_buf = NULL;
5983         int btf_fd, ret;
5984
5985         if (!insns || !insns_cnt)
5986                 return -EINVAL;
5987
5988         memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
5989         load_attr.prog_type = prog->type;
5990         /* old kernels might not support specifying expected_attach_type */
5991         if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
5992             prog->sec_def->is_exp_attach_type_optional)
5993                 load_attr.expected_attach_type = 0;
5994         else
5995                 load_attr.expected_attach_type = prog->expected_attach_type;
5996         if (kernel_supports(FEAT_PROG_NAME))
5997                 load_attr.name = prog->name;
5998         load_attr.insns = insns;
5999         load_attr.insns_cnt = insns_cnt;
6000         load_attr.license = license;
6001         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
6002             prog->type == BPF_PROG_TYPE_LSM) {
6003                 load_attr.attach_btf_id = prog->attach_btf_id;
6004         } else if (prog->type == BPF_PROG_TYPE_TRACING ||
6005                    prog->type == BPF_PROG_TYPE_EXT) {
6006                 load_attr.attach_prog_fd = prog->attach_prog_fd;
6007                 load_attr.attach_btf_id = prog->attach_btf_id;
6008         } else {
6009                 load_attr.kern_version = kern_version;
6010                 load_attr.prog_ifindex = prog->prog_ifindex;
6011         }
6012         /* specify func_info/line_info only if kernel supports them */
6013         btf_fd = bpf_object__btf_fd(prog->obj);
6014         if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
6015                 load_attr.prog_btf_fd = btf_fd;
6016                 load_attr.func_info = prog->func_info;
6017                 load_attr.func_info_rec_size = prog->func_info_rec_size;
6018                 load_attr.func_info_cnt = prog->func_info_cnt;
6019                 load_attr.line_info = prog->line_info;
6020                 load_attr.line_info_rec_size = prog->line_info_rec_size;
6021                 load_attr.line_info_cnt = prog->line_info_cnt;
6022         }
6023         load_attr.log_level = prog->log_level;
6024         load_attr.prog_flags = prog->prog_flags;
6025
6026 retry_load:
6027         if (log_buf_size) {
6028                 log_buf = malloc(log_buf_size);
6029                 if (!log_buf)
6030                         return -ENOMEM;
6031
6032                 *log_buf = 0;
6033         }
6034
6035         ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
6036
6037         if (ret >= 0) {
6038                 if (log_buf && load_attr.log_level)
6039                         pr_debug("verifier log:\n%s", log_buf);
6040                 *pfd = ret;
6041                 ret = 0;
6042                 goto out;
6043         }
6044
6045         if (!log_buf || errno == ENOSPC) {
6046                 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
6047                                    log_buf_size << 1);
6048
6049                 free(log_buf);
6050                 goto retry_load;
6051         }
6052         ret = -errno;
6053         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6054         pr_warn("load bpf program failed: %s\n", cp);
6055         pr_perm_msg(ret);
6056
6057         if (log_buf && log_buf[0] != '\0') {
6058                 ret = -LIBBPF_ERRNO__VERIFY;
6059                 pr_warn("-- BEGIN DUMP LOG ---\n");
6060                 pr_warn("\n%s\n", log_buf);
6061                 pr_warn("-- END LOG --\n");
6062         } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
6063                 pr_warn("Program too large (%zu insns), at most %d insns\n",
6064                         load_attr.insns_cnt, BPF_MAXINSNS);
6065                 ret = -LIBBPF_ERRNO__PROG2BIG;
6066         } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
6067                 /* Wrong program type? */
6068                 int fd;
6069
6070                 load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
6071                 load_attr.expected_attach_type = 0;
6072                 fd = bpf_load_program_xattr(&load_attr, NULL, 0);
6073                 if (fd >= 0) {
6074                         close(fd);
6075                         ret = -LIBBPF_ERRNO__PROGTYPE;
6076                         goto out;
6077                 }
6078         }
6079
6080 out:
6081         free(log_buf);
6082         return ret;
6083 }
6084
6085 static int libbpf_find_attach_btf_id(struct bpf_program *prog);
6086
6087 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
6088 {
6089         int err = 0, fd, i, btf_id;
6090
6091         if (prog->obj->loaded) {
6092                 pr_warn("prog '%s'('%s'): can't load after object was loaded\n",
6093                         prog->name, prog->section_name);
6094                 return -EINVAL;
6095         }
6096
6097         if ((prog->type == BPF_PROG_TYPE_TRACING ||
6098              prog->type == BPF_PROG_TYPE_LSM ||
6099              prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
6100                 btf_id = libbpf_find_attach_btf_id(prog);
6101                 if (btf_id <= 0)
6102                         return btf_id;
6103                 prog->attach_btf_id = btf_id;
6104         }
6105
6106         if (prog->instances.nr < 0 || !prog->instances.fds) {
6107                 if (prog->preprocessor) {
6108                         pr_warn("Internal error: can't load program '%s'\n",
6109                                 prog->section_name);
6110                         return -LIBBPF_ERRNO__INTERNAL;
6111                 }
6112
6113                 prog->instances.fds = malloc(sizeof(int));
6114                 if (!prog->instances.fds) {
6115                         pr_warn("Not enough memory for BPF fds\n");
6116                         return -ENOMEM;
6117                 }
6118                 prog->instances.nr = 1;
6119                 prog->instances.fds[0] = -1;
6120         }
6121
6122         if (!prog->preprocessor) {
6123                 if (prog->instances.nr != 1) {
6124                         pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n",
6125                                 prog->section_name, prog->instances.nr);
6126                 }
6127                 err = load_program(prog, prog->insns, prog->insns_cnt,
6128                                    license, kern_ver, &fd);
6129                 if (!err)
6130                         prog->instances.fds[0] = fd;
6131                 goto out;
6132         }
6133
6134         for (i = 0; i < prog->instances.nr; i++) {
6135                 struct bpf_prog_prep_result result;
6136                 bpf_program_prep_t preprocessor = prog->preprocessor;
6137
6138                 memset(&result, 0, sizeof(result));
6139                 err = preprocessor(prog, i, prog->insns,
6140                                    prog->insns_cnt, &result);
6141                 if (err) {
6142                         pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
6143                                 i, prog->section_name);
6144                         goto out;
6145                 }
6146
6147                 if (!result.new_insn_ptr || !result.new_insn_cnt) {
6148                         pr_debug("Skip loading the %dth instance of program '%s'\n",
6149                                  i, prog->section_name);
6150                         prog->instances.fds[i] = -1;
6151                         if (result.pfd)
6152                                 *result.pfd = -1;
6153                         continue;
6154                 }
6155
6156                 err = load_program(prog, result.new_insn_ptr,
6157                                    result.new_insn_cnt, license, kern_ver, &fd);
6158                 if (err) {
6159                         pr_warn("Loading the %dth instance of program '%s' failed\n",
6160                                 i, prog->section_name);
6161                         goto out;
6162                 }
6163
6164                 if (result.pfd)
6165                         *result.pfd = fd;
6166                 prog->instances.fds[i] = fd;
6167         }
6168 out:
6169         if (err)
6170                 pr_warn("failed to load program '%s'\n", prog->section_name);
6171         zfree(&prog->insns);
6172         prog->insns_cnt = 0;
6173         return err;
6174 }
6175
6176 static bool bpf_program__is_function_storage(const struct bpf_program *prog,
6177                                              const struct bpf_object *obj)
6178 {
6179         return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
6180 }
6181
6182 static int
6183 bpf_object__load_progs(struct bpf_object *obj, int log_level)
6184 {
6185         struct bpf_program *prog;
6186         size_t i;
6187         int err;
6188
6189         for (i = 0; i < obj->nr_programs; i++) {
6190                 prog = &obj->programs[i];
6191                 err = bpf_object__sanitize_prog(obj, prog);
6192                 if (err)
6193                         return err;
6194         }
6195
6196         for (i = 0; i < obj->nr_programs; i++) {
6197                 prog = &obj->programs[i];
6198                 if (bpf_program__is_function_storage(prog, obj))
6199                         continue;
6200                 if (!prog->load) {
6201                         pr_debug("prog '%s'('%s'): skipped loading\n",
6202                                  prog->name, prog->section_name);
6203                         continue;
6204                 }
6205                 prog->log_level |= log_level;
6206                 err = bpf_program__load(prog, obj->license, obj->kern_version);
6207                 if (err)
6208                         return err;
6209         }
6210         return 0;
6211 }
6212
6213 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
6214
6215 static struct bpf_object *
6216 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
6217                    const struct bpf_object_open_opts *opts)
6218 {
6219         const char *obj_name, *kconfig;
6220         struct bpf_program *prog;
6221         struct bpf_object *obj;
6222         char tmp_name[64];
6223         int err;
6224
6225         if (elf_version(EV_CURRENT) == EV_NONE) {
6226                 pr_warn("failed to init libelf for %s\n",
6227                         path ? : "(mem buf)");
6228                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
6229         }
6230
6231         if (!OPTS_VALID(opts, bpf_object_open_opts))
6232                 return ERR_PTR(-EINVAL);
6233
6234         obj_name = OPTS_GET(opts, object_name, NULL);
6235         if (obj_buf) {
6236                 if (!obj_name) {
6237                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
6238                                  (unsigned long)obj_buf,
6239                                  (unsigned long)obj_buf_sz);
6240                         obj_name = tmp_name;
6241                 }
6242                 path = obj_name;
6243                 pr_debug("loading object '%s' from buffer\n", obj_name);
6244         }
6245
6246         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
6247         if (IS_ERR(obj))
6248                 return obj;
6249
6250         kconfig = OPTS_GET(opts, kconfig, NULL);
6251         if (kconfig) {
6252                 obj->kconfig = strdup(kconfig);
6253                 if (!obj->kconfig)
6254                         return ERR_PTR(-ENOMEM);
6255         }
6256
6257         err = bpf_object__elf_init(obj);
6258         err = err ? : bpf_object__check_endianness(obj);
6259         err = err ? : bpf_object__elf_collect(obj);
6260         err = err ? : bpf_object__collect_externs(obj);
6261         err = err ? : bpf_object__finalize_btf(obj);
6262         err = err ? : bpf_object__init_maps(obj, opts);
6263         err = err ? : bpf_object__init_prog_names(obj);
6264         err = err ? : bpf_object__collect_reloc(obj);
6265         if (err)
6266                 goto out;
6267         bpf_object__elf_finish(obj);
6268
6269         bpf_object__for_each_program(prog, obj) {
6270                 prog->sec_def = find_sec_def(prog->section_name);
6271                 if (!prog->sec_def)
6272                         /* couldn't guess, but user might manually specify */
6273                         continue;
6274
6275                 bpf_program__set_type(prog, prog->sec_def->prog_type);
6276                 bpf_program__set_expected_attach_type(prog,
6277                                 prog->sec_def->expected_attach_type);
6278
6279                 if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
6280                     prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
6281                         prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
6282         }
6283
6284         return obj;
6285 out:
6286         bpf_object__close(obj);
6287         return ERR_PTR(err);
6288 }
6289
6290 static struct bpf_object *
6291 __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
6292 {
6293         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
6294                 .relaxed_maps = flags & MAPS_RELAX_COMPAT,
6295         );
6296
6297         /* param validation */
6298         if (!attr->file)
6299                 return NULL;
6300
6301         pr_debug("loading %s\n", attr->file);
6302         return __bpf_object__open(attr->file, NULL, 0, &opts);
6303 }
6304
6305 struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
6306 {
6307         return __bpf_object__open_xattr(attr, 0);
6308 }
6309
6310 struct bpf_object *bpf_object__open(const char *path)
6311 {
6312         struct bpf_object_open_attr attr = {
6313                 .file           = path,
6314                 .prog_type      = BPF_PROG_TYPE_UNSPEC,
6315         };
6316
6317         return bpf_object__open_xattr(&attr);
6318 }
6319
6320 struct bpf_object *
6321 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
6322 {
6323         if (!path)
6324                 return ERR_PTR(-EINVAL);
6325
6326         pr_debug("loading %s\n", path);
6327
6328         return __bpf_object__open(path, NULL, 0, opts);
6329 }
6330
6331 struct bpf_object *
6332 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
6333                      const struct bpf_object_open_opts *opts)
6334 {
6335         if (!obj_buf || obj_buf_sz == 0)
6336                 return ERR_PTR(-EINVAL);
6337
6338         return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
6339 }
6340
6341 struct bpf_object *
6342 bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
6343                         const char *name)
6344 {
6345         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
6346                 .object_name = name,
6347                 /* wrong default, but backwards-compatible */
6348                 .relaxed_maps = true,
6349         );
6350
6351         /* returning NULL is wrong, but backwards-compatible */
6352         if (!obj_buf || obj_buf_sz == 0)
6353                 return NULL;
6354
6355         return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
6356 }
6357
6358 int bpf_object__unload(struct bpf_object *obj)
6359 {
6360         size_t i;
6361
6362         if (!obj)
6363                 return -EINVAL;
6364
6365         for (i = 0; i < obj->nr_maps; i++) {
6366                 zclose(obj->maps[i].fd);
6367                 if (obj->maps[i].st_ops)
6368                         zfree(&obj->maps[i].st_ops->kern_vdata);
6369         }
6370
6371         for (i = 0; i < obj->nr_programs; i++)
6372                 bpf_program__unload(&obj->programs[i]);
6373
6374         return 0;
6375 }
6376
6377 static int bpf_object__sanitize_maps(struct bpf_object *obj)
6378 {
6379         struct bpf_map *m;
6380
6381         bpf_object__for_each_map(m, obj) {
6382                 if (!bpf_map__is_internal(m))
6383                         continue;
6384                 if (!kernel_supports(FEAT_GLOBAL_DATA)) {
6385                         pr_warn("kernel doesn't support global data\n");
6386                         return -ENOTSUP;
6387                 }
6388                 if (!kernel_supports(FEAT_ARRAY_MMAP))
6389                         m->def.map_flags ^= BPF_F_MMAPABLE;
6390         }
6391
6392         return 0;
6393 }
6394
6395 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
6396 {
6397         char sym_type, sym_name[500];
6398         unsigned long long sym_addr;
6399         struct extern_desc *ext;
6400         int ret, err = 0;
6401         FILE *f;
6402
6403         f = fopen("/proc/kallsyms", "r");
6404         if (!f) {
6405                 err = -errno;
6406                 pr_warn("failed to open /proc/kallsyms: %d\n", err);
6407                 return err;
6408         }
6409
6410         while (true) {
6411                 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
6412                              &sym_addr, &sym_type, sym_name);
6413                 if (ret == EOF && feof(f))
6414                         break;
6415                 if (ret != 3) {
6416                         pr_warn("failed to read kallsyms entry: %d\n", ret);
6417                         err = -EINVAL;
6418                         goto out;
6419                 }
6420
6421                 ext = find_extern_by_name(obj, sym_name);
6422                 if (!ext || ext->type != EXT_KSYM)
6423                         continue;
6424
6425                 if (ext->is_set && ext->ksym.addr != sym_addr) {
6426                         pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
6427                                 sym_name, ext->ksym.addr, sym_addr);
6428                         err = -EINVAL;
6429                         goto out;
6430                 }
6431                 if (!ext->is_set) {
6432                         ext->is_set = true;
6433                         ext->ksym.addr = sym_addr;
6434                         pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
6435                 }
6436         }
6437
6438 out:
6439         fclose(f);
6440         return err;
6441 }
6442
6443 static int bpf_object__resolve_externs(struct bpf_object *obj,
6444                                        const char *extra_kconfig)
6445 {
6446         bool need_config = false, need_kallsyms = false;
6447         struct extern_desc *ext;
6448         void *kcfg_data = NULL;
6449         int err, i;
6450
6451         if (obj->nr_extern == 0)
6452                 return 0;
6453
6454         if (obj->kconfig_map_idx >= 0)
6455                 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
6456
6457         for (i = 0; i < obj->nr_extern; i++) {
6458                 ext = &obj->externs[i];
6459
6460                 if (ext->type == EXT_KCFG &&
6461                     strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
6462                         void *ext_val = kcfg_data + ext->kcfg.data_off;
6463                         __u32 kver = get_kernel_version();
6464
6465                         if (!kver) {
6466                                 pr_warn("failed to get kernel version\n");
6467                                 return -EINVAL;
6468                         }
6469                         err = set_kcfg_value_num(ext, ext_val, kver);
6470                         if (err)
6471                                 return err;
6472                         pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
6473                 } else if (ext->type == EXT_KCFG &&
6474                            strncmp(ext->name, "CONFIG_", 7) == 0) {
6475                         need_config = true;
6476                 } else if (ext->type == EXT_KSYM) {
6477                         need_kallsyms = true;
6478                 } else {
6479                         pr_warn("unrecognized extern '%s'\n", ext->name);
6480                         return -EINVAL;
6481                 }
6482         }
6483         if (need_config && extra_kconfig) {
6484                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
6485                 if (err)
6486                         return -EINVAL;
6487                 need_config = false;
6488                 for (i = 0; i < obj->nr_extern; i++) {
6489                         ext = &obj->externs[i];
6490                         if (ext->type == EXT_KCFG && !ext->is_set) {
6491                                 need_config = true;
6492                                 break;
6493                         }
6494                 }
6495         }
6496         if (need_config) {
6497                 err = bpf_object__read_kconfig_file(obj, kcfg_data);
6498                 if (err)
6499                         return -EINVAL;
6500         }
6501         if (need_kallsyms) {
6502                 err = bpf_object__read_kallsyms_file(obj);
6503                 if (err)
6504                         return -EINVAL;
6505         }
6506         for (i = 0; i < obj->nr_extern; i++) {
6507                 ext = &obj->externs[i];
6508
6509                 if (!ext->is_set && !ext->is_weak) {
6510                         pr_warn("extern %s (strong) not resolved\n", ext->name);
6511                         return -ESRCH;
6512                 } else if (!ext->is_set) {
6513                         pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
6514                                  ext->name);
6515                 }
6516         }
6517
6518         return 0;
6519 }
6520
6521 int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
6522 {
6523         struct bpf_object *obj;
6524         int err, i;
6525
6526         if (!attr)
6527                 return -EINVAL;
6528         obj = attr->obj;
6529         if (!obj)
6530                 return -EINVAL;
6531
6532         if (obj->loaded) {
6533                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
6534                 return -EINVAL;
6535         }
6536
6537         err = bpf_object__probe_loading(obj);
6538         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
6539         err = err ? : bpf_object__sanitize_and_load_btf(obj);
6540         err = err ? : bpf_object__sanitize_maps(obj);
6541         err = err ? : bpf_object__load_vmlinux_btf(obj);
6542         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
6543         err = err ? : bpf_object__create_maps(obj);
6544         err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
6545         err = err ? : bpf_object__load_progs(obj, attr->log_level);
6546
6547         btf__free(obj->btf_vmlinux);
6548         obj->btf_vmlinux = NULL;
6549
6550         obj->loaded = true; /* doesn't matter if successfully or not */
6551
6552         if (err)
6553                 goto out;
6554
6555         return 0;
6556 out:
6557         /* unpin any maps that were auto-pinned during load */
6558         for (i = 0; i < obj->nr_maps; i++)
6559                 if (obj->maps[i].pinned && !obj->maps[i].reused)
6560                         bpf_map__unpin(&obj->maps[i], NULL);
6561
6562         bpf_object__unload(obj);
6563         pr_warn("failed to load object '%s'\n", obj->path);
6564         return err;
6565 }
6566
6567 int bpf_object__load(struct bpf_object *obj)
6568 {
6569         struct bpf_object_load_attr attr = {
6570                 .obj = obj,
6571         };
6572
6573         return bpf_object__load_xattr(&attr);
6574 }
6575
6576 static int make_parent_dir(const char *path)
6577 {
6578         char *cp, errmsg[STRERR_BUFSIZE];
6579         char *dname, *dir;
6580         int err = 0;
6581
6582         dname = strdup(path);
6583         if (dname == NULL)
6584                 return -ENOMEM;
6585
6586         dir = dirname(dname);
6587         if (mkdir(dir, 0700) && errno != EEXIST)
6588                 err = -errno;
6589
6590         free(dname);
6591         if (err) {
6592                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
6593                 pr_warn("failed to mkdir %s: %s\n", path, cp);
6594         }
6595         return err;
6596 }
6597
6598 static int check_path(const char *path)
6599 {
6600         char *cp, errmsg[STRERR_BUFSIZE];
6601         struct statfs st_fs;
6602         char *dname, *dir;
6603         int err = 0;
6604
6605         if (path == NULL)
6606                 return -EINVAL;
6607
6608         dname = strdup(path);
6609         if (dname == NULL)
6610                 return -ENOMEM;
6611
6612         dir = dirname(dname);
6613         if (statfs(dir, &st_fs)) {
6614                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6615                 pr_warn("failed to statfs %s: %s\n", dir, cp);
6616                 err = -errno;
6617         }
6618         free(dname);
6619
6620         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
6621                 pr_warn("specified path %s is not on BPF FS\n", path);
6622                 err = -EINVAL;
6623         }
6624
6625         return err;
6626 }
6627
6628 int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
6629                               int instance)
6630 {
6631         char *cp, errmsg[STRERR_BUFSIZE];
6632         int err;
6633
6634         err = make_parent_dir(path);
6635         if (err)
6636                 return err;
6637
6638         err = check_path(path);
6639         if (err)
6640                 return err;
6641
6642         if (prog == NULL) {
6643                 pr_warn("invalid program pointer\n");
6644                 return -EINVAL;
6645         }
6646
6647         if (instance < 0 || instance >= prog->instances.nr) {
6648                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
6649                         instance, prog->section_name, prog->instances.nr);
6650                 return -EINVAL;
6651         }
6652
6653         if (bpf_obj_pin(prog->instances.fds[instance], path)) {
6654                 err = -errno;
6655                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
6656                 pr_warn("failed to pin program: %s\n", cp);
6657                 return err;
6658         }
6659         pr_debug("pinned program '%s'\n", path);
6660
6661         return 0;
6662 }
6663
6664 int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
6665                                 int instance)
6666 {
6667         int err;
6668
6669         err = check_path(path);
6670         if (err)
6671                 return err;
6672
6673         if (prog == NULL) {
6674                 pr_warn("invalid program pointer\n");
6675                 return -EINVAL;
6676         }
6677
6678         if (instance < 0 || instance >= prog->instances.nr) {
6679                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
6680                         instance, prog->section_name, prog->instances.nr);
6681                 return -EINVAL;
6682         }
6683
6684         err = unlink(path);
6685         if (err != 0)
6686                 return -errno;
6687         pr_debug("unpinned program '%s'\n", path);
6688
6689         return 0;
6690 }
6691
6692 int bpf_program__pin(struct bpf_program *prog, const char *path)
6693 {
6694         int i, err;
6695
6696         err = make_parent_dir(path);
6697         if (err)
6698                 return err;
6699
6700         err = check_path(path);
6701         if (err)
6702                 return err;
6703
6704         if (prog == NULL) {
6705                 pr_warn("invalid program pointer\n");
6706                 return -EINVAL;
6707         }
6708
6709         if (prog->instances.nr <= 0) {
6710                 pr_warn("no instances of prog %s to pin\n",
6711                            prog->section_name);
6712                 return -EINVAL;
6713         }
6714
6715         if (prog->instances.nr == 1) {
6716                 /* don't create subdirs when pinning single instance */
6717                 return bpf_program__pin_instance(prog, path, 0);
6718         }
6719
6720         for (i = 0; i < prog->instances.nr; i++) {
6721                 char buf[PATH_MAX];
6722                 int len;
6723
6724                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
6725                 if (len < 0) {
6726                         err = -EINVAL;
6727                         goto err_unpin;
6728                 } else if (len >= PATH_MAX) {
6729                         err = -ENAMETOOLONG;
6730                         goto err_unpin;
6731                 }
6732
6733                 err = bpf_program__pin_instance(prog, buf, i);
6734                 if (err)
6735                         goto err_unpin;
6736         }
6737
6738         return 0;
6739
6740 err_unpin:
6741         for (i = i - 1; i >= 0; i--) {
6742                 char buf[PATH_MAX];
6743                 int len;
6744
6745                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
6746                 if (len < 0)
6747                         continue;
6748                 else if (len >= PATH_MAX)
6749                         continue;
6750
6751                 bpf_program__unpin_instance(prog, buf, i);
6752         }
6753
6754         rmdir(path);
6755
6756         return err;
6757 }
6758
6759 int bpf_program__unpin(struct bpf_program *prog, const char *path)
6760 {
6761         int i, err;
6762
6763         err = check_path(path);
6764         if (err)
6765                 return err;
6766
6767         if (prog == NULL) {
6768                 pr_warn("invalid program pointer\n");
6769                 return -EINVAL;
6770         }
6771
6772         if (prog->instances.nr <= 0) {
6773                 pr_warn("no instances of prog %s to pin\n",
6774                            prog->section_name);
6775                 return -EINVAL;
6776         }
6777
6778         if (prog->instances.nr == 1) {
6779                 /* don't create subdirs when pinning single instance */
6780                 return bpf_program__unpin_instance(prog, path, 0);
6781         }
6782
6783         for (i = 0; i < prog->instances.nr; i++) {
6784                 char buf[PATH_MAX];
6785                 int len;
6786
6787                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
6788                 if (len < 0)
6789                         return -EINVAL;
6790                 else if (len >= PATH_MAX)
6791                         return -ENAMETOOLONG;
6792
6793                 err = bpf_program__unpin_instance(prog, buf, i);
6794                 if (err)
6795                         return err;
6796         }
6797
6798         err = rmdir(path);
6799         if (err)
6800                 return -errno;
6801
6802         return 0;
6803 }
6804
6805 int bpf_map__pin(struct bpf_map *map, const char *path)
6806 {
6807         char *cp, errmsg[STRERR_BUFSIZE];
6808         int err;
6809
6810         if (map == NULL) {
6811                 pr_warn("invalid map pointer\n");
6812                 return -EINVAL;
6813         }
6814
6815         if (map->pin_path) {
6816                 if (path && strcmp(path, map->pin_path)) {
6817                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
6818                                 bpf_map__name(map), map->pin_path, path);
6819                         return -EINVAL;
6820                 } else if (map->pinned) {
6821                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
6822                                  bpf_map__name(map), map->pin_path);
6823                         return 0;
6824                 }
6825         } else {
6826                 if (!path) {
6827                         pr_warn("missing a path to pin map '%s' at\n",
6828                                 bpf_map__name(map));
6829                         return -EINVAL;
6830                 } else if (map->pinned) {
6831                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
6832                         return -EEXIST;
6833                 }
6834
6835                 map->pin_path = strdup(path);
6836                 if (!map->pin_path) {
6837                         err = -errno;
6838                         goto out_err;
6839                 }
6840         }
6841
6842         err = make_parent_dir(map->pin_path);
6843         if (err)
6844                 return err;
6845
6846         err = check_path(map->pin_path);
6847         if (err)
6848                 return err;
6849
6850         if (bpf_obj_pin(map->fd, map->pin_path)) {
6851                 err = -errno;
6852                 goto out_err;
6853         }
6854
6855         map->pinned = true;
6856         pr_debug("pinned map '%s'\n", map->pin_path);
6857
6858         return 0;
6859
6860 out_err:
6861         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
6862         pr_warn("failed to pin map: %s\n", cp);
6863         return err;
6864 }
6865
6866 int bpf_map__unpin(struct bpf_map *map, const char *path)
6867 {
6868         int err;
6869
6870         if (map == NULL) {
6871                 pr_warn("invalid map pointer\n");
6872                 return -EINVAL;
6873         }
6874
6875         if (map->pin_path) {
6876                 if (path && strcmp(path, map->pin_path)) {
6877                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
6878                                 bpf_map__name(map), map->pin_path, path);
6879                         return -EINVAL;
6880                 }
6881                 path = map->pin_path;
6882         } else if (!path) {
6883                 pr_warn("no path to unpin map '%s' from\n",
6884                         bpf_map__name(map));
6885                 return -EINVAL;
6886         }
6887
6888         err = check_path(path);
6889         if (err)
6890                 return err;
6891
6892         err = unlink(path);
6893         if (err != 0)
6894                 return -errno;
6895
6896         map->pinned = false;
6897         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
6898
6899         return 0;
6900 }
6901
6902 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
6903 {
6904         char *new = NULL;
6905
6906         if (path) {
6907                 new = strdup(path);
6908                 if (!new)
6909                         return -errno;
6910         }
6911
6912         free(map->pin_path);
6913         map->pin_path = new;
6914         return 0;
6915 }
6916
6917 const char *bpf_map__get_pin_path(const struct bpf_map *map)
6918 {
6919         return map->pin_path;
6920 }
6921
6922 bool bpf_map__is_pinned(const struct bpf_map *map)
6923 {
6924         return map->pinned;
6925 }
6926
6927 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
6928 {
6929         struct bpf_map *map;
6930         int err;
6931
6932         if (!obj)
6933                 return -ENOENT;
6934
6935         if (!obj->loaded) {
6936                 pr_warn("object not yet loaded; load it first\n");
6937                 return -ENOENT;
6938         }
6939
6940         bpf_object__for_each_map(map, obj) {
6941                 char *pin_path = NULL;
6942                 char buf[PATH_MAX];
6943
6944                 if (path) {
6945                         int len;
6946
6947                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
6948                                        bpf_map__name(map));
6949                         if (len < 0) {
6950                                 err = -EINVAL;
6951                                 goto err_unpin_maps;
6952                         } else if (len >= PATH_MAX) {
6953                                 err = -ENAMETOOLONG;
6954                                 goto err_unpin_maps;
6955                         }
6956                         pin_path = buf;
6957                 } else if (!map->pin_path) {
6958                         continue;
6959                 }
6960
6961                 err = bpf_map__pin(map, pin_path);
6962                 if (err)
6963                         goto err_unpin_maps;
6964         }
6965
6966         return 0;
6967
6968 err_unpin_maps:
6969         while ((map = bpf_map__prev(map, obj))) {
6970                 if (!map->pin_path)
6971                         continue;
6972
6973                 bpf_map__unpin(map, NULL);
6974         }
6975
6976         return err;
6977 }
6978
6979 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
6980 {
6981         struct bpf_map *map;
6982         int err;
6983
6984         if (!obj)
6985                 return -ENOENT;
6986
6987         bpf_object__for_each_map(map, obj) {
6988                 char *pin_path = NULL;
6989                 char buf[PATH_MAX];
6990
6991                 if (path) {
6992                         int len;
6993
6994                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
6995                                        bpf_map__name(map));
6996                         if (len < 0)
6997                                 return -EINVAL;
6998                         else if (len >= PATH_MAX)
6999                                 return -ENAMETOOLONG;
7000                         pin_path = buf;
7001                 } else if (!map->pin_path) {
7002                         continue;
7003                 }
7004
7005                 err = bpf_map__unpin(map, pin_path);
7006                 if (err)
7007                         return err;
7008         }
7009
7010         return 0;
7011 }
7012
7013 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
7014 {
7015         struct bpf_program *prog;
7016         int err;
7017
7018         if (!obj)
7019                 return -ENOENT;
7020
7021         if (!obj->loaded) {
7022                 pr_warn("object not yet loaded; load it first\n");
7023                 return -ENOENT;
7024         }
7025
7026         bpf_object__for_each_program(prog, obj) {
7027                 char buf[PATH_MAX];
7028                 int len;
7029
7030                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
7031                                prog->pin_name);
7032                 if (len < 0) {
7033                         err = -EINVAL;
7034                         goto err_unpin_programs;
7035                 } else if (len >= PATH_MAX) {
7036                         err = -ENAMETOOLONG;
7037                         goto err_unpin_programs;
7038                 }
7039
7040                 err = bpf_program__pin(prog, buf);
7041                 if (err)
7042                         goto err_unpin_programs;
7043         }
7044
7045         return 0;
7046
7047 err_unpin_programs:
7048         while ((prog = bpf_program__prev(prog, obj))) {
7049                 char buf[PATH_MAX];
7050                 int len;
7051
7052                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
7053                                prog->pin_name);
7054                 if (len < 0)
7055                         continue;
7056                 else if (len >= PATH_MAX)
7057                         continue;
7058
7059                 bpf_program__unpin(prog, buf);
7060         }
7061
7062         return err;
7063 }
7064
7065 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
7066 {
7067         struct bpf_program *prog;
7068         int err;
7069
7070         if (!obj)
7071                 return -ENOENT;
7072
7073         bpf_object__for_each_program(prog, obj) {
7074                 char buf[PATH_MAX];
7075                 int len;
7076
7077                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
7078                                prog->pin_name);
7079                 if (len < 0)
7080                         return -EINVAL;
7081                 else if (len >= PATH_MAX)
7082                         return -ENAMETOOLONG;
7083
7084                 err = bpf_program__unpin(prog, buf);
7085                 if (err)
7086                         return err;
7087         }
7088
7089         return 0;
7090 }
7091
7092 int bpf_object__pin(struct bpf_object *obj, const char *path)
7093 {
7094         int err;
7095
7096         err = bpf_object__pin_maps(obj, path);
7097         if (err)
7098                 return err;
7099
7100         err = bpf_object__pin_programs(obj, path);
7101         if (err) {
7102                 bpf_object__unpin_maps(obj, path);
7103                 return err;
7104         }
7105
7106         return 0;
7107 }
7108
7109 static void bpf_map__destroy(struct bpf_map *map)
7110 {
7111         if (map->clear_priv)
7112                 map->clear_priv(map, map->priv);
7113         map->priv = NULL;
7114         map->clear_priv = NULL;
7115
7116         if (map->inner_map) {
7117                 bpf_map__destroy(map->inner_map);
7118                 zfree(&map->inner_map);
7119         }
7120
7121         zfree(&map->init_slots);
7122         map->init_slots_sz = 0;
7123
7124         if (map->mmaped) {
7125                 munmap(map->mmaped, bpf_map_mmap_sz(map));
7126                 map->mmaped = NULL;
7127         }
7128
7129         if (map->st_ops) {
7130                 zfree(&map->st_ops->data);
7131                 zfree(&map->st_ops->progs);
7132                 zfree(&map->st_ops->kern_func_off);
7133                 zfree(&map->st_ops);
7134         }
7135
7136         zfree(&map->name);
7137         zfree(&map->pin_path);
7138
7139         if (map->fd >= 0)
7140                 zclose(map->fd);
7141 }
7142
7143 void bpf_object__close(struct bpf_object *obj)
7144 {
7145         size_t i;
7146
7147         if (IS_ERR_OR_NULL(obj))
7148                 return;
7149
7150         if (obj->clear_priv)
7151                 obj->clear_priv(obj, obj->priv);
7152
7153         bpf_object__elf_finish(obj);
7154         bpf_object__unload(obj);
7155         btf__free(obj->btf);
7156         btf_ext__free(obj->btf_ext);
7157
7158         for (i = 0; i < obj->nr_maps; i++)
7159                 bpf_map__destroy(&obj->maps[i]);
7160
7161         zfree(&obj->kconfig);
7162         zfree(&obj->externs);
7163         obj->nr_extern = 0;
7164
7165         zfree(&obj->maps);
7166         obj->nr_maps = 0;
7167
7168         if (obj->programs && obj->nr_programs) {
7169                 for (i = 0; i < obj->nr_programs; i++)
7170                         bpf_program__exit(&obj->programs[i]);
7171         }
7172         zfree(&obj->programs);
7173
7174         list_del(&obj->list);
7175         free(obj);
7176 }
7177
7178 struct bpf_object *
7179 bpf_object__next(struct bpf_object *prev)
7180 {
7181         struct bpf_object *next;
7182
7183         if (!prev)
7184                 next = list_first_entry(&bpf_objects_list,
7185                                         struct bpf_object,
7186                                         list);
7187         else
7188                 next = list_next_entry(prev, list);
7189
7190         /* Empty list is noticed here so don't need checking on entry. */
7191         if (&next->list == &bpf_objects_list)
7192                 return NULL;
7193
7194         return next;
7195 }
7196
7197 const char *bpf_object__name(const struct bpf_object *obj)
7198 {
7199         return obj ? obj->name : ERR_PTR(-EINVAL);
7200 }
7201
7202 unsigned int bpf_object__kversion(const struct bpf_object *obj)
7203 {
7204         return obj ? obj->kern_version : 0;
7205 }
7206
7207 struct btf *bpf_object__btf(const struct bpf_object *obj)
7208 {
7209         return obj ? obj->btf : NULL;
7210 }
7211
7212 int bpf_object__btf_fd(const struct bpf_object *obj)
7213 {
7214         return obj->btf ? btf__fd(obj->btf) : -1;
7215 }
7216
7217 int bpf_object__set_priv(struct bpf_object *obj, void *priv,
7218                          bpf_object_clear_priv_t clear_priv)
7219 {
7220         if (obj->priv && obj->clear_priv)
7221                 obj->clear_priv(obj, obj->priv);
7222
7223         obj->priv = priv;
7224         obj->clear_priv = clear_priv;
7225         return 0;
7226 }
7227
7228 void *bpf_object__priv(const struct bpf_object *obj)
7229 {
7230         return obj ? obj->priv : ERR_PTR(-EINVAL);
7231 }
7232
7233 static struct bpf_program *
7234 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
7235                     bool forward)
7236 {
7237         size_t nr_programs = obj->nr_programs;
7238         ssize_t idx;
7239
7240         if (!nr_programs)
7241                 return NULL;
7242
7243         if (!p)
7244                 /* Iter from the beginning */
7245                 return forward ? &obj->programs[0] :
7246                         &obj->programs[nr_programs - 1];
7247
7248         if (p->obj != obj) {
7249                 pr_warn("error: program handler doesn't match object\n");
7250                 return NULL;
7251         }
7252
7253         idx = (p - obj->programs) + (forward ? 1 : -1);
7254         if (idx >= obj->nr_programs || idx < 0)
7255                 return NULL;
7256         return &obj->programs[idx];
7257 }
7258
7259 struct bpf_program *
7260 bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
7261 {
7262         struct bpf_program *prog = prev;
7263
7264         do {
7265                 prog = __bpf_program__iter(prog, obj, true);
7266         } while (prog && bpf_program__is_function_storage(prog, obj));
7267
7268         return prog;
7269 }
7270
7271 struct bpf_program *
7272 bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
7273 {
7274         struct bpf_program *prog = next;
7275
7276         do {
7277                 prog = __bpf_program__iter(prog, obj, false);
7278         } while (prog && bpf_program__is_function_storage(prog, obj));
7279
7280         return prog;
7281 }
7282
7283 int bpf_program__set_priv(struct bpf_program *prog, void *priv,
7284                           bpf_program_clear_priv_t clear_priv)
7285 {
7286         if (prog->priv && prog->clear_priv)
7287                 prog->clear_priv(prog, prog->priv);
7288
7289         prog->priv = priv;
7290         prog->clear_priv = clear_priv;
7291         return 0;
7292 }
7293
7294 void *bpf_program__priv(const struct bpf_program *prog)
7295 {
7296         return prog ? prog->priv : ERR_PTR(-EINVAL);
7297 }
7298
7299 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
7300 {
7301         prog->prog_ifindex = ifindex;
7302 }
7303
7304 const char *bpf_program__name(const struct bpf_program *prog)
7305 {
7306         return prog->name;
7307 }
7308
7309 const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
7310 {
7311         const char *title;
7312
7313         title = prog->section_name;
7314         if (needs_copy) {
7315                 title = strdup(title);
7316                 if (!title) {
7317                         pr_warn("failed to strdup program title\n");
7318                         return ERR_PTR(-ENOMEM);
7319                 }
7320         }
7321
7322         return title;
7323 }
7324
7325 bool bpf_program__autoload(const struct bpf_program *prog)
7326 {
7327         return prog->load;
7328 }
7329
7330 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
7331 {
7332         if (prog->obj->loaded)
7333                 return -EINVAL;
7334
7335         prog->load = autoload;
7336         return 0;
7337 }
7338
7339 int bpf_program__fd(const struct bpf_program *prog)
7340 {
7341         return bpf_program__nth_fd(prog, 0);
7342 }
7343
7344 size_t bpf_program__size(const struct bpf_program *prog)
7345 {
7346         return prog->insns_cnt * sizeof(struct bpf_insn);
7347 }
7348
7349 int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
7350                           bpf_program_prep_t prep)
7351 {
7352         int *instances_fds;
7353
7354         if (nr_instances <= 0 || !prep)
7355                 return -EINVAL;
7356
7357         if (prog->instances.nr > 0 || prog->instances.fds) {
7358                 pr_warn("Can't set pre-processor after loading\n");
7359                 return -EINVAL;
7360         }
7361
7362         instances_fds = malloc(sizeof(int) * nr_instances);
7363         if (!instances_fds) {
7364                 pr_warn("alloc memory failed for fds\n");
7365                 return -ENOMEM;
7366         }
7367
7368         /* fill all fd with -1 */
7369         memset(instances_fds, -1, sizeof(int) * nr_instances);
7370
7371         prog->instances.nr = nr_instances;
7372         prog->instances.fds = instances_fds;
7373         prog->preprocessor = prep;
7374         return 0;
7375 }
7376
7377 int bpf_program__nth_fd(const struct bpf_program *prog, int n)
7378 {
7379         int fd;
7380
7381         if (!prog)
7382                 return -EINVAL;
7383
7384         if (n >= prog->instances.nr || n < 0) {
7385                 pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
7386                         n, prog->section_name, prog->instances.nr);
7387                 return -EINVAL;
7388         }
7389
7390         fd = prog->instances.fds[n];
7391         if (fd < 0) {
7392                 pr_warn("%dth instance of program '%s' is invalid\n",
7393                         n, prog->section_name);
7394                 return -ENOENT;
7395         }
7396
7397         return fd;
7398 }
7399
7400 enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
7401 {
7402         return prog->type;
7403 }
7404
7405 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
7406 {
7407         prog->type = type;
7408 }
7409
7410 static bool bpf_program__is_type(const struct bpf_program *prog,
7411                                  enum bpf_prog_type type)
7412 {
7413         return prog ? (prog->type == type) : false;
7414 }
7415
7416 #define BPF_PROG_TYPE_FNS(NAME, TYPE)                           \
7417 int bpf_program__set_##NAME(struct bpf_program *prog)           \
7418 {                                                               \
7419         if (!prog)                                              \
7420                 return -EINVAL;                                 \
7421         bpf_program__set_type(prog, TYPE);                      \
7422         return 0;                                               \
7423 }                                                               \
7424                                                                 \
7425 bool bpf_program__is_##NAME(const struct bpf_program *prog)     \
7426 {                                                               \
7427         return bpf_program__is_type(prog, TYPE);                \
7428 }                                                               \
7429
7430 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
7431 BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
7432 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
7433 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
7434 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
7435 BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
7436 BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
7437 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
7438 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
7439 BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
7440 BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
7441 BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
7442 BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
7443
7444 enum bpf_attach_type
7445 bpf_program__get_expected_attach_type(struct bpf_program *prog)
7446 {
7447         return prog->expected_attach_type;
7448 }
7449
7450 void bpf_program__set_expected_attach_type(struct bpf_program *prog,
7451                                            enum bpf_attach_type type)
7452 {
7453         prog->expected_attach_type = type;
7454 }
7455
7456 #define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional,           \
7457                           attachable, attach_btf)                           \
7458         {                                                                   \
7459                 .sec = string,                                              \
7460                 .len = sizeof(string) - 1,                                  \
7461                 .prog_type = ptype,                                         \
7462                 .expected_attach_type = eatype,                             \
7463                 .is_exp_attach_type_optional = eatype_optional,             \
7464                 .is_attachable = attachable,                                \
7465                 .is_attach_btf = attach_btf,                                \
7466         }
7467
7468 /* Programs that can NOT be attached. */
7469 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
7470
7471 /* Programs that can be attached. */
7472 #define BPF_APROG_SEC(string, ptype, atype) \
7473         BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0)
7474
7475 /* Programs that must specify expected attach type at load time. */
7476 #define BPF_EAPROG_SEC(string, ptype, eatype) \
7477         BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0)
7478
7479 /* Programs that use BTF to identify attach point */
7480 #define BPF_PROG_BTF(string, ptype, eatype) \
7481         BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1)
7482
7483 /* Programs that can be attached but attach type can't be identified by section
7484  * name. Kept for backward compatibility.
7485  */
7486 #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
7487
7488 #define SEC_DEF(sec_pfx, ptype, ...) {                                      \
7489         .sec = sec_pfx,                                                     \
7490         .len = sizeof(sec_pfx) - 1,                                         \
7491         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
7492         __VA_ARGS__                                                         \
7493 }
7494
7495 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
7496                                       struct bpf_program *prog);
7497 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
7498                                   struct bpf_program *prog);
7499 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
7500                                       struct bpf_program *prog);
7501 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
7502                                      struct bpf_program *prog);
7503 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
7504                                    struct bpf_program *prog);
7505 static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
7506                                     struct bpf_program *prog);
7507
7508 static const struct bpf_sec_def section_defs[] = {
7509         BPF_PROG_SEC("socket",                  BPF_PROG_TYPE_SOCKET_FILTER),
7510         BPF_PROG_SEC("sk_reuseport",            BPF_PROG_TYPE_SK_REUSEPORT),
7511         SEC_DEF("kprobe/", KPROBE,
7512                 .attach_fn = attach_kprobe),
7513         BPF_PROG_SEC("uprobe/",                 BPF_PROG_TYPE_KPROBE),
7514         SEC_DEF("kretprobe/", KPROBE,
7515                 .attach_fn = attach_kprobe),
7516         BPF_PROG_SEC("uretprobe/",              BPF_PROG_TYPE_KPROBE),
7517         BPF_PROG_SEC("classifier",              BPF_PROG_TYPE_SCHED_CLS),
7518         BPF_PROG_SEC("action",                  BPF_PROG_TYPE_SCHED_ACT),
7519         SEC_DEF("tracepoint/", TRACEPOINT,
7520                 .attach_fn = attach_tp),
7521         SEC_DEF("tp/", TRACEPOINT,
7522                 .attach_fn = attach_tp),
7523         SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
7524                 .attach_fn = attach_raw_tp),
7525         SEC_DEF("raw_tp/", RAW_TRACEPOINT,
7526                 .attach_fn = attach_raw_tp),
7527         SEC_DEF("tp_btf/", TRACING,
7528                 .expected_attach_type = BPF_TRACE_RAW_TP,
7529                 .is_attach_btf = true,
7530                 .attach_fn = attach_trace),
7531         SEC_DEF("fentry/", TRACING,
7532                 .expected_attach_type = BPF_TRACE_FENTRY,
7533                 .is_attach_btf = true,
7534                 .attach_fn = attach_trace),
7535         SEC_DEF("fmod_ret/", TRACING,
7536                 .expected_attach_type = BPF_MODIFY_RETURN,
7537                 .is_attach_btf = true,
7538                 .attach_fn = attach_trace),
7539         SEC_DEF("fexit/", TRACING,
7540                 .expected_attach_type = BPF_TRACE_FEXIT,
7541                 .is_attach_btf = true,
7542                 .attach_fn = attach_trace),
7543         SEC_DEF("freplace/", EXT,
7544                 .is_attach_btf = true,
7545                 .attach_fn = attach_trace),
7546         SEC_DEF("lsm/", LSM,
7547                 .is_attach_btf = true,
7548                 .expected_attach_type = BPF_LSM_MAC,
7549                 .attach_fn = attach_lsm),
7550         SEC_DEF("iter/", TRACING,
7551                 .expected_attach_type = BPF_TRACE_ITER,
7552                 .is_attach_btf = true,
7553                 .attach_fn = attach_iter),
7554         BPF_EAPROG_SEC("xdp_devmap/",           BPF_PROG_TYPE_XDP,
7555                                                 BPF_XDP_DEVMAP),
7556         BPF_EAPROG_SEC("xdp_cpumap/",           BPF_PROG_TYPE_XDP,
7557                                                 BPF_XDP_CPUMAP),
7558         BPF_EAPROG_SEC("xdp",                   BPF_PROG_TYPE_XDP,
7559                                                 BPF_XDP),
7560         BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
7561         BPF_PROG_SEC("lwt_in",                  BPF_PROG_TYPE_LWT_IN),
7562         BPF_PROG_SEC("lwt_out",                 BPF_PROG_TYPE_LWT_OUT),
7563         BPF_PROG_SEC("lwt_xmit",                BPF_PROG_TYPE_LWT_XMIT),
7564         BPF_PROG_SEC("lwt_seg6local",           BPF_PROG_TYPE_LWT_SEG6LOCAL),
7565         BPF_APROG_SEC("cgroup_skb/ingress",     BPF_PROG_TYPE_CGROUP_SKB,
7566                                                 BPF_CGROUP_INET_INGRESS),
7567         BPF_APROG_SEC("cgroup_skb/egress",      BPF_PROG_TYPE_CGROUP_SKB,
7568                                                 BPF_CGROUP_INET_EGRESS),
7569         BPF_APROG_COMPAT("cgroup/skb",          BPF_PROG_TYPE_CGROUP_SKB),
7570         BPF_EAPROG_SEC("cgroup/sock_create",    BPF_PROG_TYPE_CGROUP_SOCK,
7571                                                 BPF_CGROUP_INET_SOCK_CREATE),
7572         BPF_EAPROG_SEC("cgroup/sock_release",   BPF_PROG_TYPE_CGROUP_SOCK,
7573                                                 BPF_CGROUP_INET_SOCK_RELEASE),
7574         BPF_APROG_SEC("cgroup/sock",            BPF_PROG_TYPE_CGROUP_SOCK,
7575                                                 BPF_CGROUP_INET_SOCK_CREATE),
7576         BPF_EAPROG_SEC("cgroup/post_bind4",     BPF_PROG_TYPE_CGROUP_SOCK,
7577                                                 BPF_CGROUP_INET4_POST_BIND),
7578         BPF_EAPROG_SEC("cgroup/post_bind6",     BPF_PROG_TYPE_CGROUP_SOCK,
7579                                                 BPF_CGROUP_INET6_POST_BIND),
7580         BPF_APROG_SEC("cgroup/dev",             BPF_PROG_TYPE_CGROUP_DEVICE,
7581                                                 BPF_CGROUP_DEVICE),
7582         BPF_APROG_SEC("sockops",                BPF_PROG_TYPE_SOCK_OPS,
7583                                                 BPF_CGROUP_SOCK_OPS),
7584         BPF_APROG_SEC("sk_skb/stream_parser",   BPF_PROG_TYPE_SK_SKB,
7585                                                 BPF_SK_SKB_STREAM_PARSER),
7586         BPF_APROG_SEC("sk_skb/stream_verdict",  BPF_PROG_TYPE_SK_SKB,
7587                                                 BPF_SK_SKB_STREAM_VERDICT),
7588         BPF_APROG_COMPAT("sk_skb",              BPF_PROG_TYPE_SK_SKB),
7589         BPF_APROG_SEC("sk_msg",                 BPF_PROG_TYPE_SK_MSG,
7590                                                 BPF_SK_MSG_VERDICT),
7591         BPF_APROG_SEC("lirc_mode2",             BPF_PROG_TYPE_LIRC_MODE2,
7592                                                 BPF_LIRC_MODE2),
7593         BPF_APROG_SEC("flow_dissector",         BPF_PROG_TYPE_FLOW_DISSECTOR,
7594                                                 BPF_FLOW_DISSECTOR),
7595         BPF_EAPROG_SEC("cgroup/bind4",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7596                                                 BPF_CGROUP_INET4_BIND),
7597         BPF_EAPROG_SEC("cgroup/bind6",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7598                                                 BPF_CGROUP_INET6_BIND),
7599         BPF_EAPROG_SEC("cgroup/connect4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7600                                                 BPF_CGROUP_INET4_CONNECT),
7601         BPF_EAPROG_SEC("cgroup/connect6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7602                                                 BPF_CGROUP_INET6_CONNECT),
7603         BPF_EAPROG_SEC("cgroup/sendmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7604                                                 BPF_CGROUP_UDP4_SENDMSG),
7605         BPF_EAPROG_SEC("cgroup/sendmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7606                                                 BPF_CGROUP_UDP6_SENDMSG),
7607         BPF_EAPROG_SEC("cgroup/recvmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7608                                                 BPF_CGROUP_UDP4_RECVMSG),
7609         BPF_EAPROG_SEC("cgroup/recvmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7610                                                 BPF_CGROUP_UDP6_RECVMSG),
7611         BPF_EAPROG_SEC("cgroup/getpeername4",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7612                                                 BPF_CGROUP_INET4_GETPEERNAME),
7613         BPF_EAPROG_SEC("cgroup/getpeername6",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7614                                                 BPF_CGROUP_INET6_GETPEERNAME),
7615         BPF_EAPROG_SEC("cgroup/getsockname4",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7616                                                 BPF_CGROUP_INET4_GETSOCKNAME),
7617         BPF_EAPROG_SEC("cgroup/getsockname6",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7618                                                 BPF_CGROUP_INET6_GETSOCKNAME),
7619         BPF_EAPROG_SEC("cgroup/sysctl",         BPF_PROG_TYPE_CGROUP_SYSCTL,
7620                                                 BPF_CGROUP_SYSCTL),
7621         BPF_EAPROG_SEC("cgroup/getsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
7622                                                 BPF_CGROUP_GETSOCKOPT),
7623         BPF_EAPROG_SEC("cgroup/setsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
7624                                                 BPF_CGROUP_SETSOCKOPT),
7625         BPF_PROG_SEC("struct_ops",              BPF_PROG_TYPE_STRUCT_OPS),
7626         BPF_EAPROG_SEC("sk_lookup/",            BPF_PROG_TYPE_SK_LOOKUP,
7627                                                 BPF_SK_LOOKUP),
7628 };
7629
7630 #undef BPF_PROG_SEC_IMPL
7631 #undef BPF_PROG_SEC
7632 #undef BPF_APROG_SEC
7633 #undef BPF_EAPROG_SEC
7634 #undef BPF_APROG_COMPAT
7635 #undef SEC_DEF
7636
7637 #define MAX_TYPE_NAME_SIZE 32
7638
7639 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
7640 {
7641         int i, n = ARRAY_SIZE(section_defs);
7642
7643         for (i = 0; i < n; i++) {
7644                 if (strncmp(sec_name,
7645                             section_defs[i].sec, section_defs[i].len))
7646                         continue;
7647                 return &section_defs[i];
7648         }
7649         return NULL;
7650 }
7651
7652 static char *libbpf_get_type_names(bool attach_type)
7653 {
7654         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
7655         char *buf;
7656
7657         buf = malloc(len);
7658         if (!buf)
7659                 return NULL;
7660
7661         buf[0] = '\0';
7662         /* Forge string buf with all available names */
7663         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
7664                 if (attach_type && !section_defs[i].is_attachable)
7665                         continue;
7666
7667                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
7668                         free(buf);
7669                         return NULL;
7670                 }
7671                 strcat(buf, " ");
7672                 strcat(buf, section_defs[i].sec);
7673         }
7674
7675         return buf;
7676 }
7677
7678 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
7679                              enum bpf_attach_type *expected_attach_type)
7680 {
7681         const struct bpf_sec_def *sec_def;
7682         char *type_names;
7683
7684         if (!name)
7685                 return -EINVAL;
7686
7687         sec_def = find_sec_def(name);
7688         if (sec_def) {
7689                 *prog_type = sec_def->prog_type;
7690                 *expected_attach_type = sec_def->expected_attach_type;
7691                 return 0;
7692         }
7693
7694         pr_debug("failed to guess program type from ELF section '%s'\n", name);
7695         type_names = libbpf_get_type_names(false);
7696         if (type_names != NULL) {
7697                 pr_debug("supported section(type) names are:%s\n", type_names);
7698                 free(type_names);
7699         }
7700
7701         return -ESRCH;
7702 }
7703
7704 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
7705                                                      size_t offset)
7706 {
7707         struct bpf_map *map;
7708         size_t i;
7709
7710         for (i = 0; i < obj->nr_maps; i++) {
7711                 map = &obj->maps[i];
7712                 if (!bpf_map__is_struct_ops(map))
7713                         continue;
7714                 if (map->sec_offset <= offset &&
7715                     offset - map->sec_offset < map->def.value_size)
7716                         return map;
7717         }
7718
7719         return NULL;
7720 }
7721
7722 /* Collect the reloc from ELF and populate the st_ops->progs[] */
7723 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
7724                                             GElf_Shdr *shdr, Elf_Data *data)
7725 {
7726         const struct btf_member *member;
7727         struct bpf_struct_ops *st_ops;
7728         struct bpf_program *prog;
7729         unsigned int shdr_idx;
7730         const struct btf *btf;
7731         struct bpf_map *map;
7732         Elf_Data *symbols;
7733         unsigned int moff;
7734         const char *name;
7735         __u32 member_idx;
7736         GElf_Sym sym;
7737         GElf_Rel rel;
7738         int i, nrels;
7739
7740         symbols = obj->efile.symbols;
7741         btf = obj->btf;
7742         nrels = shdr->sh_size / shdr->sh_entsize;
7743         for (i = 0; i < nrels; i++) {
7744                 if (!gelf_getrel(data, i, &rel)) {
7745                         pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
7746                         return -LIBBPF_ERRNO__FORMAT;
7747                 }
7748
7749                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
7750                         pr_warn("struct_ops reloc: symbol %zx not found\n",
7751                                 (size_t)GELF_R_SYM(rel.r_info));
7752                         return -LIBBPF_ERRNO__FORMAT;
7753                 }
7754
7755                 name = elf_sym_str(obj, sym.st_name) ?: "<?>";
7756                 map = find_struct_ops_map_by_offset(obj, rel.r_offset);
7757                 if (!map) {
7758                         pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
7759                                 (size_t)rel.r_offset);
7760                         return -EINVAL;
7761                 }
7762
7763                 moff = rel.r_offset - map->sec_offset;
7764                 shdr_idx = sym.st_shndx;
7765                 st_ops = map->st_ops;
7766                 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
7767                          map->name,
7768                          (long long)(rel.r_info >> 32),
7769                          (long long)sym.st_value,
7770                          shdr_idx, (size_t)rel.r_offset,
7771                          map->sec_offset, sym.st_name, name);
7772
7773                 if (shdr_idx >= SHN_LORESERVE) {
7774                         pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n",
7775                                 map->name, (size_t)rel.r_offset, shdr_idx);
7776                         return -LIBBPF_ERRNO__RELOC;
7777                 }
7778
7779                 member = find_member_by_offset(st_ops->type, moff * 8);
7780                 if (!member) {
7781                         pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
7782                                 map->name, moff);
7783                         return -EINVAL;
7784                 }
7785                 member_idx = member - btf_members(st_ops->type);
7786                 name = btf__name_by_offset(btf, member->name_off);
7787
7788                 if (!resolve_func_ptr(btf, member->type, NULL)) {
7789                         pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
7790                                 map->name, name);
7791                         return -EINVAL;
7792                 }
7793
7794                 prog = bpf_object__find_prog_by_idx(obj, shdr_idx);
7795                 if (!prog) {
7796                         pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
7797                                 map->name, shdr_idx, name);
7798                         return -EINVAL;
7799                 }
7800
7801                 if (prog->type == BPF_PROG_TYPE_UNSPEC) {
7802                         const struct bpf_sec_def *sec_def;
7803
7804                         sec_def = find_sec_def(prog->section_name);
7805                         if (sec_def &&
7806                             sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
7807                                 /* for pr_warn */
7808                                 prog->type = sec_def->prog_type;
7809                                 goto invalid_prog;
7810                         }
7811
7812                         prog->type = BPF_PROG_TYPE_STRUCT_OPS;
7813                         prog->attach_btf_id = st_ops->type_id;
7814                         prog->expected_attach_type = member_idx;
7815                 } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
7816                            prog->attach_btf_id != st_ops->type_id ||
7817                            prog->expected_attach_type != member_idx) {
7818                         goto invalid_prog;
7819                 }
7820                 st_ops->progs[member_idx] = prog;
7821         }
7822
7823         return 0;
7824
7825 invalid_prog:
7826         pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
7827                 map->name, prog->name, prog->section_name, prog->type,
7828                 prog->attach_btf_id, prog->expected_attach_type, name);
7829         return -EINVAL;
7830 }
7831
7832 #define BTF_TRACE_PREFIX "btf_trace_"
7833 #define BTF_LSM_PREFIX "bpf_lsm_"
7834 #define BTF_ITER_PREFIX "bpf_iter_"
7835 #define BTF_MAX_NAME_SIZE 128
7836
7837 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
7838                                    const char *name, __u32 kind)
7839 {
7840         char btf_type_name[BTF_MAX_NAME_SIZE];
7841         int ret;
7842
7843         ret = snprintf(btf_type_name, sizeof(btf_type_name),
7844                        "%s%s", prefix, name);
7845         /* snprintf returns the number of characters written excluding the
7846          * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
7847          * indicates truncation.
7848          */
7849         if (ret < 0 || ret >= sizeof(btf_type_name))
7850                 return -ENAMETOOLONG;
7851         return btf__find_by_name_kind(btf, btf_type_name, kind);
7852 }
7853
7854 static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
7855                                         enum bpf_attach_type attach_type)
7856 {
7857         int err;
7858
7859         if (attach_type == BPF_TRACE_RAW_TP)
7860                 err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
7861                                               BTF_KIND_TYPEDEF);
7862         else if (attach_type == BPF_LSM_MAC)
7863                 err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
7864                                               BTF_KIND_FUNC);
7865         else if (attach_type == BPF_TRACE_ITER)
7866                 err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
7867                                               BTF_KIND_FUNC);
7868         else
7869                 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
7870
7871         if (err <= 0)
7872                 pr_warn("%s is not found in vmlinux BTF\n", name);
7873
7874         return err;
7875 }
7876
7877 int libbpf_find_vmlinux_btf_id(const char *name,
7878                                enum bpf_attach_type attach_type)
7879 {
7880         struct btf *btf;
7881         int err;
7882
7883         btf = libbpf_find_kernel_btf();
7884         if (IS_ERR(btf)) {
7885                 pr_warn("vmlinux BTF is not found\n");
7886                 return -EINVAL;
7887         }
7888
7889         err = __find_vmlinux_btf_id(btf, name, attach_type);
7890         btf__free(btf);
7891         return err;
7892 }
7893
7894 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
7895 {
7896         struct bpf_prog_info_linear *info_linear;
7897         struct bpf_prog_info *info;
7898         struct btf *btf = NULL;
7899         int err = -EINVAL;
7900
7901         info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
7902         if (IS_ERR_OR_NULL(info_linear)) {
7903                 pr_warn("failed get_prog_info_linear for FD %d\n",
7904                         attach_prog_fd);
7905                 return -EINVAL;
7906         }
7907         info = &info_linear->info;
7908         if (!info->btf_id) {
7909                 pr_warn("The target program doesn't have BTF\n");
7910                 goto out;
7911         }
7912         if (btf__get_from_id(info->btf_id, &btf)) {
7913                 pr_warn("Failed to get BTF of the program\n");
7914                 goto out;
7915         }
7916         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
7917         btf__free(btf);
7918         if (err <= 0) {
7919                 pr_warn("%s is not found in prog's BTF\n", name);
7920                 goto out;
7921         }
7922 out:
7923         free(info_linear);
7924         return err;
7925 }
7926
7927 static int libbpf_find_attach_btf_id(struct bpf_program *prog)
7928 {
7929         enum bpf_attach_type attach_type = prog->expected_attach_type;
7930         __u32 attach_prog_fd = prog->attach_prog_fd;
7931         const char *name = prog->section_name;
7932         int i, err;
7933
7934         if (!name)
7935                 return -EINVAL;
7936
7937         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
7938                 if (!section_defs[i].is_attach_btf)
7939                         continue;
7940                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
7941                         continue;
7942                 if (attach_prog_fd)
7943                         err = libbpf_find_prog_btf_id(name + section_defs[i].len,
7944                                                       attach_prog_fd);
7945                 else
7946                         err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
7947                                                     name + section_defs[i].len,
7948                                                     attach_type);
7949                 return err;
7950         }
7951         pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
7952         return -ESRCH;
7953 }
7954
7955 int libbpf_attach_type_by_name(const char *name,
7956                                enum bpf_attach_type *attach_type)
7957 {
7958         char *type_names;
7959         int i;
7960
7961         if (!name)
7962                 return -EINVAL;
7963
7964         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
7965                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
7966                         continue;
7967                 if (!section_defs[i].is_attachable)
7968                         return -EINVAL;
7969                 *attach_type = section_defs[i].expected_attach_type;
7970                 return 0;
7971         }
7972         pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
7973         type_names = libbpf_get_type_names(true);
7974         if (type_names != NULL) {
7975                 pr_debug("attachable section(type) names are:%s\n", type_names);
7976                 free(type_names);
7977         }
7978
7979         return -EINVAL;
7980 }
7981
7982 int bpf_map__fd(const struct bpf_map *map)
7983 {
7984         return map ? map->fd : -EINVAL;
7985 }
7986
7987 const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
7988 {
7989         return map ? &map->def : ERR_PTR(-EINVAL);
7990 }
7991
7992 const char *bpf_map__name(const struct bpf_map *map)
7993 {
7994         return map ? map->name : NULL;
7995 }
7996
7997 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
7998 {
7999         return map->def.type;
8000 }
8001
8002 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
8003 {
8004         if (map->fd >= 0)
8005                 return -EBUSY;
8006         map->def.type = type;
8007         return 0;
8008 }
8009
8010 __u32 bpf_map__map_flags(const struct bpf_map *map)
8011 {
8012         return map->def.map_flags;
8013 }
8014
8015 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
8016 {
8017         if (map->fd >= 0)
8018                 return -EBUSY;
8019         map->def.map_flags = flags;
8020         return 0;
8021 }
8022
8023 __u32 bpf_map__numa_node(const struct bpf_map *map)
8024 {
8025         return map->numa_node;
8026 }
8027
8028 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
8029 {
8030         if (map->fd >= 0)
8031                 return -EBUSY;
8032         map->numa_node = numa_node;
8033         return 0;
8034 }
8035
8036 __u32 bpf_map__key_size(const struct bpf_map *map)
8037 {
8038         return map->def.key_size;
8039 }
8040
8041 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
8042 {
8043         if (map->fd >= 0)
8044                 return -EBUSY;
8045         map->def.key_size = size;
8046         return 0;
8047 }
8048
8049 __u32 bpf_map__value_size(const struct bpf_map *map)
8050 {
8051         return map->def.value_size;
8052 }
8053
8054 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
8055 {
8056         if (map->fd >= 0)
8057                 return -EBUSY;
8058         map->def.value_size = size;
8059         return 0;
8060 }
8061
8062 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
8063 {
8064         return map ? map->btf_key_type_id : 0;
8065 }
8066
8067 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
8068 {
8069         return map ? map->btf_value_type_id : 0;
8070 }
8071
8072 int bpf_map__set_priv(struct bpf_map *map, void *priv,
8073                      bpf_map_clear_priv_t clear_priv)
8074 {
8075         if (!map)
8076                 return -EINVAL;
8077
8078         if (map->priv) {
8079                 if (map->clear_priv)
8080                         map->clear_priv(map, map->priv);
8081         }
8082
8083         map->priv = priv;
8084         map->clear_priv = clear_priv;
8085         return 0;
8086 }
8087
8088 void *bpf_map__priv(const struct bpf_map *map)
8089 {
8090         return map ? map->priv : ERR_PTR(-EINVAL);
8091 }
8092
8093 int bpf_map__set_initial_value(struct bpf_map *map,
8094                                const void *data, size_t size)
8095 {
8096         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
8097             size != map->def.value_size || map->fd >= 0)
8098                 return -EINVAL;
8099
8100         memcpy(map->mmaped, data, size);
8101         return 0;
8102 }
8103
8104 bool bpf_map__is_offload_neutral(const struct bpf_map *map)
8105 {
8106         return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
8107 }
8108
8109 bool bpf_map__is_internal(const struct bpf_map *map)
8110 {
8111         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
8112 }
8113
8114 __u32 bpf_map__ifindex(const struct bpf_map *map)
8115 {
8116         return map->map_ifindex;
8117 }
8118
8119 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
8120 {
8121         if (map->fd >= 0)
8122                 return -EBUSY;
8123         map->map_ifindex = ifindex;
8124         return 0;
8125 }
8126
8127 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
8128 {
8129         if (!bpf_map_type__is_map_in_map(map->def.type)) {
8130                 pr_warn("error: unsupported map type\n");
8131                 return -EINVAL;
8132         }
8133         if (map->inner_map_fd != -1) {
8134                 pr_warn("error: inner_map_fd already specified\n");
8135                 return -EINVAL;
8136         }
8137         map->inner_map_fd = fd;
8138         return 0;
8139 }
8140
8141 static struct bpf_map *
8142 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
8143 {
8144         ssize_t idx;
8145         struct bpf_map *s, *e;
8146
8147         if (!obj || !obj->maps)
8148                 return NULL;
8149
8150         s = obj->maps;
8151         e = obj->maps + obj->nr_maps;
8152
8153         if ((m < s) || (m >= e)) {
8154                 pr_warn("error in %s: map handler doesn't belong to object\n",
8155                          __func__);
8156                 return NULL;
8157         }
8158
8159         idx = (m - obj->maps) + i;
8160         if (idx >= obj->nr_maps || idx < 0)
8161                 return NULL;
8162         return &obj->maps[idx];
8163 }
8164
8165 struct bpf_map *
8166 bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
8167 {
8168         if (prev == NULL)
8169                 return obj->maps;
8170
8171         return __bpf_map__iter(prev, obj, 1);
8172 }
8173
8174 struct bpf_map *
8175 bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
8176 {
8177         if (next == NULL) {
8178                 if (!obj->nr_maps)
8179                         return NULL;
8180                 return obj->maps + obj->nr_maps - 1;
8181         }
8182
8183         return __bpf_map__iter(next, obj, -1);
8184 }
8185
8186 struct bpf_map *
8187 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
8188 {
8189         struct bpf_map *pos;
8190
8191         bpf_object__for_each_map(pos, obj) {
8192                 if (pos->name && !strcmp(pos->name, name))
8193                         return pos;
8194         }
8195         return NULL;
8196 }
8197
8198 int
8199 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
8200 {
8201         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
8202 }
8203
8204 struct bpf_map *
8205 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
8206 {
8207         return ERR_PTR(-ENOTSUP);
8208 }
8209
8210 long libbpf_get_error(const void *ptr)
8211 {
8212         return PTR_ERR_OR_ZERO(ptr);
8213 }
8214
8215 int bpf_prog_load(const char *file, enum bpf_prog_type type,
8216                   struct bpf_object **pobj, int *prog_fd)
8217 {
8218         struct bpf_prog_load_attr attr;
8219
8220         memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
8221         attr.file = file;
8222         attr.prog_type = type;
8223         attr.expected_attach_type = 0;
8224
8225         return bpf_prog_load_xattr(&attr, pobj, prog_fd);
8226 }
8227
8228 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
8229                         struct bpf_object **pobj, int *prog_fd)
8230 {
8231         struct bpf_object_open_attr open_attr = {};
8232         struct bpf_program *prog, *first_prog = NULL;
8233         struct bpf_object *obj;
8234         struct bpf_map *map;
8235         int err;
8236
8237         if (!attr)
8238                 return -EINVAL;
8239         if (!attr->file)
8240                 return -EINVAL;
8241
8242         open_attr.file = attr->file;
8243         open_attr.prog_type = attr->prog_type;
8244
8245         obj = bpf_object__open_xattr(&open_attr);
8246         if (IS_ERR_OR_NULL(obj))
8247                 return -ENOENT;
8248
8249         bpf_object__for_each_program(prog, obj) {
8250                 enum bpf_attach_type attach_type = attr->expected_attach_type;
8251                 /*
8252                  * to preserve backwards compatibility, bpf_prog_load treats
8253                  * attr->prog_type, if specified, as an override to whatever
8254                  * bpf_object__open guessed
8255                  */
8256                 if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
8257                         bpf_program__set_type(prog, attr->prog_type);
8258                         bpf_program__set_expected_attach_type(prog,
8259                                                               attach_type);
8260                 }
8261                 if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
8262                         /*
8263                          * we haven't guessed from section name and user
8264                          * didn't provide a fallback type, too bad...
8265                          */
8266                         bpf_object__close(obj);
8267                         return -EINVAL;
8268                 }
8269
8270                 prog->prog_ifindex = attr->ifindex;
8271                 prog->log_level = attr->log_level;
8272                 prog->prog_flags = attr->prog_flags;
8273                 if (!first_prog)
8274                         first_prog = prog;
8275         }
8276
8277         bpf_object__for_each_map(map, obj) {
8278                 if (!bpf_map__is_offload_neutral(map))
8279                         map->map_ifindex = attr->ifindex;
8280         }
8281
8282         if (!first_prog) {
8283                 pr_warn("object file doesn't contain bpf program\n");
8284                 bpf_object__close(obj);
8285                 return -ENOENT;
8286         }
8287
8288         err = bpf_object__load(obj);
8289         if (err) {
8290                 bpf_object__close(obj);
8291                 return err;
8292         }
8293
8294         *pobj = obj;
8295         *prog_fd = bpf_program__fd(first_prog);
8296         return 0;
8297 }
8298
8299 struct bpf_link {
8300         int (*detach)(struct bpf_link *link);
8301         int (*destroy)(struct bpf_link *link);
8302         char *pin_path;         /* NULL, if not pinned */
8303         int fd;                 /* hook FD, -1 if not applicable */
8304         bool disconnected;
8305 };
8306
8307 /* Replace link's underlying BPF program with the new one */
8308 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
8309 {
8310         return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
8311 }
8312
8313 /* Release "ownership" of underlying BPF resource (typically, BPF program
8314  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
8315  * link, when destructed through bpf_link__destroy() call won't attempt to
8316  * detach/unregisted that BPF resource. This is useful in situations where,
8317  * say, attached BPF program has to outlive userspace program that attached it
8318  * in the system. Depending on type of BPF program, though, there might be
8319  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
8320  * exit of userspace program doesn't trigger automatic detachment and clean up
8321  * inside the kernel.
8322  */
8323 void bpf_link__disconnect(struct bpf_link *link)
8324 {
8325         link->disconnected = true;
8326 }
8327
8328 int bpf_link__destroy(struct bpf_link *link)
8329 {
8330         int err = 0;
8331
8332         if (IS_ERR_OR_NULL(link))
8333                 return 0;
8334
8335         if (!link->disconnected && link->detach)
8336                 err = link->detach(link);
8337         if (link->destroy)
8338                 link->destroy(link);
8339         if (link->pin_path)
8340                 free(link->pin_path);
8341         free(link);
8342
8343         return err;
8344 }
8345
8346 int bpf_link__fd(const struct bpf_link *link)
8347 {
8348         return link->fd;
8349 }
8350
8351 const char *bpf_link__pin_path(const struct bpf_link *link)
8352 {
8353         return link->pin_path;
8354 }
8355
8356 static int bpf_link__detach_fd(struct bpf_link *link)
8357 {
8358         return close(link->fd);
8359 }
8360
8361 struct bpf_link *bpf_link__open(const char *path)
8362 {
8363         struct bpf_link *link;
8364         int fd;
8365
8366         fd = bpf_obj_get(path);
8367         if (fd < 0) {
8368                 fd = -errno;
8369                 pr_warn("failed to open link at %s: %d\n", path, fd);
8370                 return ERR_PTR(fd);
8371         }
8372
8373         link = calloc(1, sizeof(*link));
8374         if (!link) {
8375                 close(fd);
8376                 return ERR_PTR(-ENOMEM);
8377         }
8378         link->detach = &bpf_link__detach_fd;
8379         link->fd = fd;
8380
8381         link->pin_path = strdup(path);
8382         if (!link->pin_path) {
8383                 bpf_link__destroy(link);
8384                 return ERR_PTR(-ENOMEM);
8385         }
8386
8387         return link;
8388 }
8389
8390 int bpf_link__detach(struct bpf_link *link)
8391 {
8392         return bpf_link_detach(link->fd) ? -errno : 0;
8393 }
8394
8395 int bpf_link__pin(struct bpf_link *link, const char *path)
8396 {
8397         int err;
8398
8399         if (link->pin_path)
8400                 return -EBUSY;
8401         err = make_parent_dir(path);
8402         if (err)
8403                 return err;
8404         err = check_path(path);
8405         if (err)
8406                 return err;
8407
8408         link->pin_path = strdup(path);
8409         if (!link->pin_path)
8410                 return -ENOMEM;
8411
8412         if (bpf_obj_pin(link->fd, link->pin_path)) {
8413                 err = -errno;
8414                 zfree(&link->pin_path);
8415                 return err;
8416         }
8417
8418         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
8419         return 0;
8420 }
8421
8422 int bpf_link__unpin(struct bpf_link *link)
8423 {
8424         int err;
8425
8426         if (!link->pin_path)
8427                 return -EINVAL;
8428
8429         err = unlink(link->pin_path);
8430         if (err != 0)
8431                 return -errno;
8432
8433         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
8434         zfree(&link->pin_path);
8435         return 0;
8436 }
8437
8438 static int bpf_link__detach_perf_event(struct bpf_link *link)
8439 {
8440         int err;
8441
8442         err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
8443         if (err)
8444                 err = -errno;
8445
8446         close(link->fd);
8447         return err;
8448 }
8449
8450 struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
8451                                                 int pfd)
8452 {
8453         char errmsg[STRERR_BUFSIZE];
8454         struct bpf_link *link;
8455         int prog_fd, err;
8456
8457         if (pfd < 0) {
8458                 pr_warn("program '%s': invalid perf event FD %d\n",
8459                         bpf_program__title(prog, false), pfd);
8460                 return ERR_PTR(-EINVAL);
8461         }
8462         prog_fd = bpf_program__fd(prog);
8463         if (prog_fd < 0) {
8464                 pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
8465                         bpf_program__title(prog, false));
8466                 return ERR_PTR(-EINVAL);
8467         }
8468
8469         link = calloc(1, sizeof(*link));
8470         if (!link)
8471                 return ERR_PTR(-ENOMEM);
8472         link->detach = &bpf_link__detach_perf_event;
8473         link->fd = pfd;
8474
8475         if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
8476                 err = -errno;
8477                 free(link);
8478                 pr_warn("program '%s': failed to attach to pfd %d: %s\n",
8479                         bpf_program__title(prog, false), pfd,
8480                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8481                 if (err == -EPROTO)
8482                         pr_warn("program '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
8483                                 bpf_program__title(prog, false), pfd);
8484                 return ERR_PTR(err);
8485         }
8486         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
8487                 err = -errno;
8488                 free(link);
8489                 pr_warn("program '%s': failed to enable pfd %d: %s\n",
8490                         bpf_program__title(prog, false), pfd,
8491                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8492                 return ERR_PTR(err);
8493         }
8494         return link;
8495 }
8496
8497 /*
8498  * this function is expected to parse integer in the range of [0, 2^31-1] from
8499  * given file using scanf format string fmt. If actual parsed value is
8500  * negative, the result might be indistinguishable from error
8501  */
8502 static int parse_uint_from_file(const char *file, const char *fmt)
8503 {
8504         char buf[STRERR_BUFSIZE];
8505         int err, ret;
8506         FILE *f;
8507
8508         f = fopen(file, "r");
8509         if (!f) {
8510                 err = -errno;
8511                 pr_debug("failed to open '%s': %s\n", file,
8512                          libbpf_strerror_r(err, buf, sizeof(buf)));
8513                 return err;
8514         }
8515         err = fscanf(f, fmt, &ret);
8516         if (err != 1) {
8517                 err = err == EOF ? -EIO : -errno;
8518                 pr_debug("failed to parse '%s': %s\n", file,
8519                         libbpf_strerror_r(err, buf, sizeof(buf)));
8520                 fclose(f);
8521                 return err;
8522         }
8523         fclose(f);
8524         return ret;
8525 }
8526
8527 static int determine_kprobe_perf_type(void)
8528 {
8529         const char *file = "/sys/bus/event_source/devices/kprobe/type";
8530
8531         return parse_uint_from_file(file, "%d\n");
8532 }
8533
8534 static int determine_uprobe_perf_type(void)
8535 {
8536         const char *file = "/sys/bus/event_source/devices/uprobe/type";
8537
8538         return parse_uint_from_file(file, "%d\n");
8539 }
8540
8541 static int determine_kprobe_retprobe_bit(void)
8542 {
8543         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
8544
8545         return parse_uint_from_file(file, "config:%d\n");
8546 }
8547
8548 static int determine_uprobe_retprobe_bit(void)
8549 {
8550         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
8551
8552         return parse_uint_from_file(file, "config:%d\n");
8553 }
8554
8555 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
8556                                  uint64_t offset, int pid)
8557 {
8558         struct perf_event_attr attr = {};
8559         char errmsg[STRERR_BUFSIZE];
8560         int type, pfd, err;
8561
8562         type = uprobe ? determine_uprobe_perf_type()
8563                       : determine_kprobe_perf_type();
8564         if (type < 0) {
8565                 pr_warn("failed to determine %s perf type: %s\n",
8566                         uprobe ? "uprobe" : "kprobe",
8567                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
8568                 return type;
8569         }
8570         if (retprobe) {
8571                 int bit = uprobe ? determine_uprobe_retprobe_bit()
8572                                  : determine_kprobe_retprobe_bit();
8573
8574                 if (bit < 0) {
8575                         pr_warn("failed to determine %s retprobe bit: %s\n",
8576                                 uprobe ? "uprobe" : "kprobe",
8577                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
8578                         return bit;
8579                 }
8580                 attr.config |= 1 << bit;
8581         }
8582         attr.size = sizeof(attr);
8583         attr.type = type;
8584         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
8585         attr.config2 = offset;           /* kprobe_addr or probe_offset */
8586
8587         /* pid filter is meaningful only for uprobes */
8588         pfd = syscall(__NR_perf_event_open, &attr,
8589                       pid < 0 ? -1 : pid /* pid */,
8590                       pid == -1 ? 0 : -1 /* cpu */,
8591                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
8592         if (pfd < 0) {
8593                 err = -errno;
8594                 pr_warn("%s perf_event_open() failed: %s\n",
8595                         uprobe ? "uprobe" : "kprobe",
8596                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8597                 return err;
8598         }
8599         return pfd;
8600 }
8601
8602 struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
8603                                             bool retprobe,
8604                                             const char *func_name)
8605 {
8606         char errmsg[STRERR_BUFSIZE];
8607         struct bpf_link *link;
8608         int pfd, err;
8609
8610         pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
8611                                     0 /* offset */, -1 /* pid */);
8612         if (pfd < 0) {
8613                 pr_warn("program '%s': failed to create %s '%s' perf event: %s\n",
8614                         bpf_program__title(prog, false),
8615                         retprobe ? "kretprobe" : "kprobe", func_name,
8616                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
8617                 return ERR_PTR(pfd);
8618         }
8619         link = bpf_program__attach_perf_event(prog, pfd);
8620         if (IS_ERR(link)) {
8621                 close(pfd);
8622                 err = PTR_ERR(link);
8623                 pr_warn("program '%s': failed to attach to %s '%s': %s\n",
8624                         bpf_program__title(prog, false),
8625                         retprobe ? "kretprobe" : "kprobe", func_name,
8626                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8627                 return link;
8628         }
8629         return link;
8630 }
8631
8632 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
8633                                       struct bpf_program *prog)
8634 {
8635         const char *func_name;
8636         bool retprobe;
8637
8638         func_name = bpf_program__title(prog, false) + sec->len;
8639         retprobe = strcmp(sec->sec, "kretprobe/") == 0;
8640
8641         return bpf_program__attach_kprobe(prog, retprobe, func_name);
8642 }
8643
8644 struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
8645                                             bool retprobe, pid_t pid,
8646                                             const char *binary_path,
8647                                             size_t func_offset)
8648 {
8649         char errmsg[STRERR_BUFSIZE];
8650         struct bpf_link *link;
8651         int pfd, err;
8652
8653         pfd = perf_event_open_probe(true /* uprobe */, retprobe,
8654                                     binary_path, func_offset, pid);
8655         if (pfd < 0) {
8656                 pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
8657                         bpf_program__title(prog, false),
8658                         retprobe ? "uretprobe" : "uprobe",
8659                         binary_path, func_offset,
8660                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
8661                 return ERR_PTR(pfd);
8662         }
8663         link = bpf_program__attach_perf_event(prog, pfd);
8664         if (IS_ERR(link)) {
8665                 close(pfd);
8666                 err = PTR_ERR(link);
8667                 pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
8668                         bpf_program__title(prog, false),
8669                         retprobe ? "uretprobe" : "uprobe",
8670                         binary_path, func_offset,
8671                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8672                 return link;
8673         }
8674         return link;
8675 }
8676
8677 static int determine_tracepoint_id(const char *tp_category,
8678                                    const char *tp_name)
8679 {
8680         char file[PATH_MAX];
8681         int ret;
8682
8683         ret = snprintf(file, sizeof(file),
8684                        "/sys/kernel/debug/tracing/events/%s/%s/id",
8685                        tp_category, tp_name);
8686         if (ret < 0)
8687                 return -errno;
8688         if (ret >= sizeof(file)) {
8689                 pr_debug("tracepoint %s/%s path is too long\n",
8690                          tp_category, tp_name);
8691                 return -E2BIG;
8692         }
8693         return parse_uint_from_file(file, "%d\n");
8694 }
8695
8696 static int perf_event_open_tracepoint(const char *tp_category,
8697                                       const char *tp_name)
8698 {
8699         struct perf_event_attr attr = {};
8700         char errmsg[STRERR_BUFSIZE];
8701         int tp_id, pfd, err;
8702
8703         tp_id = determine_tracepoint_id(tp_category, tp_name);
8704         if (tp_id < 0) {
8705                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
8706                         tp_category, tp_name,
8707                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
8708                 return tp_id;
8709         }
8710
8711         attr.type = PERF_TYPE_TRACEPOINT;
8712         attr.size = sizeof(attr);
8713         attr.config = tp_id;
8714
8715         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
8716                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
8717         if (pfd < 0) {
8718                 err = -errno;
8719                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
8720                         tp_category, tp_name,
8721                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8722                 return err;
8723         }
8724         return pfd;
8725 }
8726
8727 struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
8728                                                 const char *tp_category,
8729                                                 const char *tp_name)
8730 {
8731         char errmsg[STRERR_BUFSIZE];
8732         struct bpf_link *link;
8733         int pfd, err;
8734
8735         pfd = perf_event_open_tracepoint(tp_category, tp_name);
8736         if (pfd < 0) {
8737                 pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
8738                         bpf_program__title(prog, false),
8739                         tp_category, tp_name,
8740                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
8741                 return ERR_PTR(pfd);
8742         }
8743         link = bpf_program__attach_perf_event(prog, pfd);
8744         if (IS_ERR(link)) {
8745                 close(pfd);
8746                 err = PTR_ERR(link);
8747                 pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
8748                         bpf_program__title(prog, false),
8749                         tp_category, tp_name,
8750                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8751                 return link;
8752         }
8753         return link;
8754 }
8755
8756 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
8757                                   struct bpf_program *prog)
8758 {
8759         char *sec_name, *tp_cat, *tp_name;
8760         struct bpf_link *link;
8761
8762         sec_name = strdup(bpf_program__title(prog, false));
8763         if (!sec_name)
8764                 return ERR_PTR(-ENOMEM);
8765
8766         /* extract "tp/<category>/<name>" */
8767         tp_cat = sec_name + sec->len;
8768         tp_name = strchr(tp_cat, '/');
8769         if (!tp_name) {
8770                 link = ERR_PTR(-EINVAL);
8771                 goto out;
8772         }
8773         *tp_name = '\0';
8774         tp_name++;
8775
8776         link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
8777 out:
8778         free(sec_name);
8779         return link;
8780 }
8781
8782 struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
8783                                                     const char *tp_name)
8784 {
8785         char errmsg[STRERR_BUFSIZE];
8786         struct bpf_link *link;
8787         int prog_fd, pfd;
8788
8789         prog_fd = bpf_program__fd(prog);
8790         if (prog_fd < 0) {
8791                 pr_warn("program '%s': can't attach before loaded\n",
8792                         bpf_program__title(prog, false));
8793                 return ERR_PTR(-EINVAL);
8794         }
8795
8796         link = calloc(1, sizeof(*link));
8797         if (!link)
8798                 return ERR_PTR(-ENOMEM);
8799         link->detach = &bpf_link__detach_fd;
8800
8801         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
8802         if (pfd < 0) {
8803                 pfd = -errno;
8804                 free(link);
8805                 pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n",
8806                         bpf_program__title(prog, false), tp_name,
8807                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
8808                 return ERR_PTR(pfd);
8809         }
8810         link->fd = pfd;
8811         return link;
8812 }
8813
8814 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
8815                                       struct bpf_program *prog)
8816 {
8817         const char *tp_name = bpf_program__title(prog, false) + sec->len;
8818
8819         return bpf_program__attach_raw_tracepoint(prog, tp_name);
8820 }
8821
8822 /* Common logic for all BPF program types that attach to a btf_id */
8823 static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
8824 {
8825         char errmsg[STRERR_BUFSIZE];
8826         struct bpf_link *link;
8827         int prog_fd, pfd;
8828
8829         prog_fd = bpf_program__fd(prog);
8830         if (prog_fd < 0) {
8831                 pr_warn("program '%s': can't attach before loaded\n",
8832                         bpf_program__title(prog, false));
8833                 return ERR_PTR(-EINVAL);
8834         }
8835
8836         link = calloc(1, sizeof(*link));
8837         if (!link)
8838                 return ERR_PTR(-ENOMEM);
8839         link->detach = &bpf_link__detach_fd;
8840
8841         pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
8842         if (pfd < 0) {
8843                 pfd = -errno;
8844                 free(link);
8845                 pr_warn("program '%s': failed to attach: %s\n",
8846                         bpf_program__title(prog, false),
8847                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
8848                 return ERR_PTR(pfd);
8849         }
8850         link->fd = pfd;
8851         return (struct bpf_link *)link;
8852 }
8853
8854 struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
8855 {
8856         return bpf_program__attach_btf_id(prog);
8857 }
8858
8859 struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
8860 {
8861         return bpf_program__attach_btf_id(prog);
8862 }
8863
8864 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
8865                                      struct bpf_program *prog)
8866 {
8867         return bpf_program__attach_trace(prog);
8868 }
8869
8870 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
8871                                    struct bpf_program *prog)
8872 {
8873         return bpf_program__attach_lsm(prog);
8874 }
8875
8876 static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
8877                                     struct bpf_program *prog)
8878 {
8879         return bpf_program__attach_iter(prog, NULL);
8880 }
8881
8882 static struct bpf_link *
8883 bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
8884                        const char *target_name)
8885 {
8886         enum bpf_attach_type attach_type;
8887         char errmsg[STRERR_BUFSIZE];
8888         struct bpf_link *link;
8889         int prog_fd, link_fd;
8890
8891         prog_fd = bpf_program__fd(prog);
8892         if (prog_fd < 0) {
8893                 pr_warn("program '%s': can't attach before loaded\n",
8894                         bpf_program__title(prog, false));
8895                 return ERR_PTR(-EINVAL);
8896         }
8897
8898         link = calloc(1, sizeof(*link));
8899         if (!link)
8900                 return ERR_PTR(-ENOMEM);
8901         link->detach = &bpf_link__detach_fd;
8902
8903         attach_type = bpf_program__get_expected_attach_type(prog);
8904         link_fd = bpf_link_create(prog_fd, target_fd, attach_type, NULL);
8905         if (link_fd < 0) {
8906                 link_fd = -errno;
8907                 free(link);
8908                 pr_warn("program '%s': failed to attach to %s: %s\n",
8909                         bpf_program__title(prog, false), target_name,
8910                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
8911                 return ERR_PTR(link_fd);
8912         }
8913         link->fd = link_fd;
8914         return link;
8915 }
8916
8917 struct bpf_link *
8918 bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
8919 {
8920         return bpf_program__attach_fd(prog, cgroup_fd, "cgroup");
8921 }
8922
8923 struct bpf_link *
8924 bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
8925 {
8926         return bpf_program__attach_fd(prog, netns_fd, "netns");
8927 }
8928
8929 struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
8930 {
8931         /* target_fd/target_ifindex use the same field in LINK_CREATE */
8932         return bpf_program__attach_fd(prog, ifindex, "xdp");
8933 }
8934
8935 struct bpf_link *
8936 bpf_program__attach_iter(struct bpf_program *prog,
8937                          const struct bpf_iter_attach_opts *opts)
8938 {
8939         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
8940         char errmsg[STRERR_BUFSIZE];
8941         struct bpf_link *link;
8942         int prog_fd, link_fd;
8943         __u32 target_fd = 0;
8944
8945         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
8946                 return ERR_PTR(-EINVAL);
8947
8948         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
8949         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
8950
8951         prog_fd = bpf_program__fd(prog);
8952         if (prog_fd < 0) {
8953                 pr_warn("program '%s': can't attach before loaded\n",
8954                         bpf_program__title(prog, false));
8955                 return ERR_PTR(-EINVAL);
8956         }
8957
8958         link = calloc(1, sizeof(*link));
8959         if (!link)
8960                 return ERR_PTR(-ENOMEM);
8961         link->detach = &bpf_link__detach_fd;
8962
8963         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
8964                                   &link_create_opts);
8965         if (link_fd < 0) {
8966                 link_fd = -errno;
8967                 free(link);
8968                 pr_warn("program '%s': failed to attach to iterator: %s\n",
8969                         bpf_program__title(prog, false),
8970                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
8971                 return ERR_PTR(link_fd);
8972         }
8973         link->fd = link_fd;
8974         return link;
8975 }
8976
8977 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
8978 {
8979         const struct bpf_sec_def *sec_def;
8980
8981         sec_def = find_sec_def(bpf_program__title(prog, false));
8982         if (!sec_def || !sec_def->attach_fn)
8983                 return ERR_PTR(-ESRCH);
8984
8985         return sec_def->attach_fn(sec_def, prog);
8986 }
8987
8988 static int bpf_link__detach_struct_ops(struct bpf_link *link)
8989 {
8990         __u32 zero = 0;
8991
8992         if (bpf_map_delete_elem(link->fd, &zero))
8993                 return -errno;
8994
8995         return 0;
8996 }
8997
8998 struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
8999 {
9000         struct bpf_struct_ops *st_ops;
9001         struct bpf_link *link;
9002         __u32 i, zero = 0;
9003         int err;
9004
9005         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
9006                 return ERR_PTR(-EINVAL);
9007
9008         link = calloc(1, sizeof(*link));
9009         if (!link)
9010                 return ERR_PTR(-EINVAL);
9011
9012         st_ops = map->st_ops;
9013         for (i = 0; i < btf_vlen(st_ops->type); i++) {
9014                 struct bpf_program *prog = st_ops->progs[i];
9015                 void *kern_data;
9016                 int prog_fd;
9017
9018                 if (!prog)
9019                         continue;
9020
9021                 prog_fd = bpf_program__fd(prog);
9022                 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
9023                 *(unsigned long *)kern_data = prog_fd;
9024         }
9025
9026         err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
9027         if (err) {
9028                 err = -errno;
9029                 free(link);
9030                 return ERR_PTR(err);
9031         }
9032
9033         link->detach = bpf_link__detach_struct_ops;
9034         link->fd = map->fd;
9035
9036         return link;
9037 }
9038
9039 enum bpf_perf_event_ret
9040 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
9041                            void **copy_mem, size_t *copy_size,
9042                            bpf_perf_event_print_t fn, void *private_data)
9043 {
9044         struct perf_event_mmap_page *header = mmap_mem;
9045         __u64 data_head = ring_buffer_read_head(header);
9046         __u64 data_tail = header->data_tail;
9047         void *base = ((__u8 *)header) + page_size;
9048         int ret = LIBBPF_PERF_EVENT_CONT;
9049         struct perf_event_header *ehdr;
9050         size_t ehdr_size;
9051
9052         while (data_head != data_tail) {
9053                 ehdr = base + (data_tail & (mmap_size - 1));
9054                 ehdr_size = ehdr->size;
9055
9056                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
9057                         void *copy_start = ehdr;
9058                         size_t len_first = base + mmap_size - copy_start;
9059                         size_t len_secnd = ehdr_size - len_first;
9060
9061                         if (*copy_size < ehdr_size) {
9062                                 free(*copy_mem);
9063                                 *copy_mem = malloc(ehdr_size);
9064                                 if (!*copy_mem) {
9065                                         *copy_size = 0;
9066                                         ret = LIBBPF_PERF_EVENT_ERROR;
9067                                         break;
9068                                 }
9069                                 *copy_size = ehdr_size;
9070                         }
9071
9072                         memcpy(*copy_mem, copy_start, len_first);
9073                         memcpy(*copy_mem + len_first, base, len_secnd);
9074                         ehdr = *copy_mem;
9075                 }
9076
9077                 ret = fn(ehdr, private_data);
9078                 data_tail += ehdr_size;
9079                 if (ret != LIBBPF_PERF_EVENT_CONT)
9080                         break;
9081         }
9082
9083         ring_buffer_write_tail(header, data_tail);
9084         return ret;
9085 }
9086
9087 struct perf_buffer;
9088
9089 struct perf_buffer_params {
9090         struct perf_event_attr *attr;
9091         /* if event_cb is specified, it takes precendence */
9092         perf_buffer_event_fn event_cb;
9093         /* sample_cb and lost_cb are higher-level common-case callbacks */
9094         perf_buffer_sample_fn sample_cb;
9095         perf_buffer_lost_fn lost_cb;
9096         void *ctx;
9097         int cpu_cnt;
9098         int *cpus;
9099         int *map_keys;
9100 };
9101
9102 struct perf_cpu_buf {
9103         struct perf_buffer *pb;
9104         void *base; /* mmap()'ed memory */
9105         void *buf; /* for reconstructing segmented data */
9106         size_t buf_size;
9107         int fd;
9108         int cpu;
9109         int map_key;
9110 };
9111
9112 struct perf_buffer {
9113         perf_buffer_event_fn event_cb;
9114         perf_buffer_sample_fn sample_cb;
9115         perf_buffer_lost_fn lost_cb;
9116         void *ctx; /* passed into callbacks */
9117
9118         size_t page_size;
9119         size_t mmap_size;
9120         struct perf_cpu_buf **cpu_bufs;
9121         struct epoll_event *events;
9122         int cpu_cnt; /* number of allocated CPU buffers */
9123         int epoll_fd; /* perf event FD */
9124         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
9125 };
9126
9127 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
9128                                       struct perf_cpu_buf *cpu_buf)
9129 {
9130         if (!cpu_buf)
9131                 return;
9132         if (cpu_buf->base &&
9133             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
9134                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
9135         if (cpu_buf->fd >= 0) {
9136                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
9137                 close(cpu_buf->fd);
9138         }
9139         free(cpu_buf->buf);
9140         free(cpu_buf);
9141 }
9142
9143 void perf_buffer__free(struct perf_buffer *pb)
9144 {
9145         int i;
9146
9147         if (IS_ERR_OR_NULL(pb))
9148                 return;
9149         if (pb->cpu_bufs) {
9150                 for (i = 0; i < pb->cpu_cnt; i++) {
9151                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
9152
9153                         if (!cpu_buf)
9154                                 continue;
9155
9156                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
9157                         perf_buffer__free_cpu_buf(pb, cpu_buf);
9158                 }
9159                 free(pb->cpu_bufs);
9160         }
9161         if (pb->epoll_fd >= 0)
9162                 close(pb->epoll_fd);
9163         free(pb->events);
9164         free(pb);
9165 }
9166
9167 static struct perf_cpu_buf *
9168 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
9169                           int cpu, int map_key)
9170 {
9171         struct perf_cpu_buf *cpu_buf;
9172         char msg[STRERR_BUFSIZE];
9173         int err;
9174
9175         cpu_buf = calloc(1, sizeof(*cpu_buf));
9176         if (!cpu_buf)
9177                 return ERR_PTR(-ENOMEM);
9178
9179         cpu_buf->pb = pb;
9180         cpu_buf->cpu = cpu;
9181         cpu_buf->map_key = map_key;
9182
9183         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
9184                               -1, PERF_FLAG_FD_CLOEXEC);
9185         if (cpu_buf->fd < 0) {
9186                 err = -errno;
9187                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
9188                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
9189                 goto error;
9190         }
9191
9192         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
9193                              PROT_READ | PROT_WRITE, MAP_SHARED,
9194                              cpu_buf->fd, 0);
9195         if (cpu_buf->base == MAP_FAILED) {
9196                 cpu_buf->base = NULL;
9197                 err = -errno;
9198                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
9199                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
9200                 goto error;
9201         }
9202
9203         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
9204                 err = -errno;
9205                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
9206                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
9207                 goto error;
9208         }
9209
9210         return cpu_buf;
9211
9212 error:
9213         perf_buffer__free_cpu_buf(pb, cpu_buf);
9214         return (struct perf_cpu_buf *)ERR_PTR(err);
9215 }
9216
9217 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
9218                                               struct perf_buffer_params *p);
9219
9220 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
9221                                      const struct perf_buffer_opts *opts)
9222 {
9223         struct perf_buffer_params p = {};
9224         struct perf_event_attr attr = { 0, };
9225
9226         attr.config = PERF_COUNT_SW_BPF_OUTPUT;
9227         attr.type = PERF_TYPE_SOFTWARE;
9228         attr.sample_type = PERF_SAMPLE_RAW;
9229         attr.sample_period = 1;
9230         attr.wakeup_events = 1;
9231
9232         p.attr = &attr;
9233         p.sample_cb = opts ? opts->sample_cb : NULL;
9234         p.lost_cb = opts ? opts->lost_cb : NULL;
9235         p.ctx = opts ? opts->ctx : NULL;
9236
9237         return __perf_buffer__new(map_fd, page_cnt, &p);
9238 }
9239
9240 struct perf_buffer *
9241 perf_buffer__new_raw(int map_fd, size_t page_cnt,
9242                      const struct perf_buffer_raw_opts *opts)
9243 {
9244         struct perf_buffer_params p = {};
9245
9246         p.attr = opts->attr;
9247         p.event_cb = opts->event_cb;
9248         p.ctx = opts->ctx;
9249         p.cpu_cnt = opts->cpu_cnt;
9250         p.cpus = opts->cpus;
9251         p.map_keys = opts->map_keys;
9252
9253         return __perf_buffer__new(map_fd, page_cnt, &p);
9254 }
9255
9256 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
9257                                               struct perf_buffer_params *p)
9258 {
9259         const char *online_cpus_file = "/sys/devices/system/cpu/online";
9260         struct bpf_map_info map;
9261         char msg[STRERR_BUFSIZE];
9262         struct perf_buffer *pb;
9263         bool *online = NULL;
9264         __u32 map_info_len;
9265         int err, i, j, n;
9266
9267         if (page_cnt & (page_cnt - 1)) {
9268                 pr_warn("page count should be power of two, but is %zu\n",
9269                         page_cnt);
9270                 return ERR_PTR(-EINVAL);
9271         }
9272
9273         /* best-effort sanity checks */
9274         memset(&map, 0, sizeof(map));
9275         map_info_len = sizeof(map);
9276         err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
9277         if (err) {
9278                 err = -errno;
9279                 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
9280                  * -EBADFD, -EFAULT, or -E2BIG on real error
9281                  */
9282                 if (err != -EINVAL) {
9283                         pr_warn("failed to get map info for map FD %d: %s\n",
9284                                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
9285                         return ERR_PTR(err);
9286                 }
9287                 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
9288                          map_fd);
9289         } else {
9290                 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
9291                         pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
9292                                 map.name);
9293                         return ERR_PTR(-EINVAL);
9294                 }
9295         }
9296
9297         pb = calloc(1, sizeof(*pb));
9298         if (!pb)
9299                 return ERR_PTR(-ENOMEM);
9300
9301         pb->event_cb = p->event_cb;
9302         pb->sample_cb = p->sample_cb;
9303         pb->lost_cb = p->lost_cb;
9304         pb->ctx = p->ctx;
9305
9306         pb->page_size = getpagesize();
9307         pb->mmap_size = pb->page_size * page_cnt;
9308         pb->map_fd = map_fd;
9309
9310         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
9311         if (pb->epoll_fd < 0) {
9312                 err = -errno;
9313                 pr_warn("failed to create epoll instance: %s\n",
9314                         libbpf_strerror_r(err, msg, sizeof(msg)));
9315                 goto error;
9316         }
9317
9318         if (p->cpu_cnt > 0) {
9319                 pb->cpu_cnt = p->cpu_cnt;
9320         } else {
9321                 pb->cpu_cnt = libbpf_num_possible_cpus();
9322                 if (pb->cpu_cnt < 0) {
9323                         err = pb->cpu_cnt;
9324                         goto error;
9325                 }
9326                 if (map.max_entries && map.max_entries < pb->cpu_cnt)
9327                         pb->cpu_cnt = map.max_entries;
9328         }
9329
9330         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
9331         if (!pb->events) {
9332                 err = -ENOMEM;
9333                 pr_warn("failed to allocate events: out of memory\n");
9334                 goto error;
9335         }
9336         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
9337         if (!pb->cpu_bufs) {
9338                 err = -ENOMEM;
9339                 pr_warn("failed to allocate buffers: out of memory\n");
9340                 goto error;
9341         }
9342
9343         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
9344         if (err) {
9345                 pr_warn("failed to get online CPU mask: %d\n", err);
9346                 goto error;
9347         }
9348
9349         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
9350                 struct perf_cpu_buf *cpu_buf;
9351                 int cpu, map_key;
9352
9353                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
9354                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
9355
9356                 /* in case user didn't explicitly requested particular CPUs to
9357                  * be attached to, skip offline/not present CPUs
9358                  */
9359                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
9360                         continue;
9361
9362                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
9363                 if (IS_ERR(cpu_buf)) {
9364                         err = PTR_ERR(cpu_buf);
9365                         goto error;
9366                 }
9367
9368                 pb->cpu_bufs[j] = cpu_buf;
9369
9370                 err = bpf_map_update_elem(pb->map_fd, &map_key,
9371                                           &cpu_buf->fd, 0);
9372                 if (err) {
9373                         err = -errno;
9374                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
9375                                 cpu, map_key, cpu_buf->fd,
9376                                 libbpf_strerror_r(err, msg, sizeof(msg)));
9377                         goto error;
9378                 }
9379
9380                 pb->events[j].events = EPOLLIN;
9381                 pb->events[j].data.ptr = cpu_buf;
9382                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
9383                               &pb->events[j]) < 0) {
9384                         err = -errno;
9385                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
9386                                 cpu, cpu_buf->fd,
9387                                 libbpf_strerror_r(err, msg, sizeof(msg)));
9388                         goto error;
9389                 }
9390                 j++;
9391         }
9392         pb->cpu_cnt = j;
9393         free(online);
9394
9395         return pb;
9396
9397 error:
9398         free(online);
9399         if (pb)
9400                 perf_buffer__free(pb);
9401         return ERR_PTR(err);
9402 }
9403
9404 struct perf_sample_raw {
9405         struct perf_event_header header;
9406         uint32_t size;
9407         char data[];
9408 };
9409
9410 struct perf_sample_lost {
9411         struct perf_event_header header;
9412         uint64_t id;
9413         uint64_t lost;
9414         uint64_t sample_id;
9415 };
9416
9417 static enum bpf_perf_event_ret
9418 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
9419 {
9420         struct perf_cpu_buf *cpu_buf = ctx;
9421         struct perf_buffer *pb = cpu_buf->pb;
9422         void *data = e;
9423
9424         /* user wants full control over parsing perf event */
9425         if (pb->event_cb)
9426                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
9427
9428         switch (e->type) {
9429         case PERF_RECORD_SAMPLE: {
9430                 struct perf_sample_raw *s = data;
9431
9432                 if (pb->sample_cb)
9433                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
9434                 break;
9435         }
9436         case PERF_RECORD_LOST: {
9437                 struct perf_sample_lost *s = data;
9438
9439                 if (pb->lost_cb)
9440                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
9441                 break;
9442         }
9443         default:
9444                 pr_warn("unknown perf sample type %d\n", e->type);
9445                 return LIBBPF_PERF_EVENT_ERROR;
9446         }
9447         return LIBBPF_PERF_EVENT_CONT;
9448 }
9449
9450 static int perf_buffer__process_records(struct perf_buffer *pb,
9451                                         struct perf_cpu_buf *cpu_buf)
9452 {
9453         enum bpf_perf_event_ret ret;
9454
9455         ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
9456                                          pb->page_size, &cpu_buf->buf,
9457                                          &cpu_buf->buf_size,
9458                                          perf_buffer__process_record, cpu_buf);
9459         if (ret != LIBBPF_PERF_EVENT_CONT)
9460                 return ret;
9461         return 0;
9462 }
9463
9464 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
9465 {
9466         return pb->epoll_fd;
9467 }
9468
9469 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
9470 {
9471         int i, cnt, err;
9472
9473         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
9474         for (i = 0; i < cnt; i++) {
9475                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
9476
9477                 err = perf_buffer__process_records(pb, cpu_buf);
9478                 if (err) {
9479                         pr_warn("error while processing records: %d\n", err);
9480                         return err;
9481                 }
9482         }
9483         return cnt < 0 ? -errno : cnt;
9484 }
9485
9486 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
9487  * manager.
9488  */
9489 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
9490 {
9491         return pb->cpu_cnt;
9492 }
9493
9494 /*
9495  * Return perf_event FD of a ring buffer in *buf_idx* slot of
9496  * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
9497  * select()/poll()/epoll() Linux syscalls.
9498  */
9499 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
9500 {
9501         struct perf_cpu_buf *cpu_buf;
9502
9503         if (buf_idx >= pb->cpu_cnt)
9504                 return -EINVAL;
9505
9506         cpu_buf = pb->cpu_bufs[buf_idx];
9507         if (!cpu_buf)
9508                 return -ENOENT;
9509
9510         return cpu_buf->fd;
9511 }
9512
9513 /*
9514  * Consume data from perf ring buffer corresponding to slot *buf_idx* in
9515  * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
9516  * consume, do nothing and return success.
9517  * Returns:
9518  *   - 0 on success;
9519  *   - <0 on failure.
9520  */
9521 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
9522 {
9523         struct perf_cpu_buf *cpu_buf;
9524
9525         if (buf_idx >= pb->cpu_cnt)
9526                 return -EINVAL;
9527
9528         cpu_buf = pb->cpu_bufs[buf_idx];
9529         if (!cpu_buf)
9530                 return -ENOENT;
9531
9532         return perf_buffer__process_records(pb, cpu_buf);
9533 }
9534
9535 int perf_buffer__consume(struct perf_buffer *pb)
9536 {
9537         int i, err;
9538
9539         for (i = 0; i < pb->cpu_cnt; i++) {
9540                 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
9541
9542                 if (!cpu_buf)
9543                         continue;
9544
9545                 err = perf_buffer__process_records(pb, cpu_buf);
9546                 if (err) {
9547                         pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
9548                         return err;
9549                 }
9550         }
9551         return 0;
9552 }
9553
9554 struct bpf_prog_info_array_desc {
9555         int     array_offset;   /* e.g. offset of jited_prog_insns */
9556         int     count_offset;   /* e.g. offset of jited_prog_len */
9557         int     size_offset;    /* > 0: offset of rec size,
9558                                  * < 0: fix size of -size_offset
9559                                  */
9560 };
9561
9562 static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
9563         [BPF_PROG_INFO_JITED_INSNS] = {
9564                 offsetof(struct bpf_prog_info, jited_prog_insns),
9565                 offsetof(struct bpf_prog_info, jited_prog_len),
9566                 -1,
9567         },
9568         [BPF_PROG_INFO_XLATED_INSNS] = {
9569                 offsetof(struct bpf_prog_info, xlated_prog_insns),
9570                 offsetof(struct bpf_prog_info, xlated_prog_len),
9571                 -1,
9572         },
9573         [BPF_PROG_INFO_MAP_IDS] = {
9574                 offsetof(struct bpf_prog_info, map_ids),
9575                 offsetof(struct bpf_prog_info, nr_map_ids),
9576                 -(int)sizeof(__u32),
9577         },
9578         [BPF_PROG_INFO_JITED_KSYMS] = {
9579                 offsetof(struct bpf_prog_info, jited_ksyms),
9580                 offsetof(struct bpf_prog_info, nr_jited_ksyms),
9581                 -(int)sizeof(__u64),
9582         },
9583         [BPF_PROG_INFO_JITED_FUNC_LENS] = {
9584                 offsetof(struct bpf_prog_info, jited_func_lens),
9585                 offsetof(struct bpf_prog_info, nr_jited_func_lens),
9586                 -(int)sizeof(__u32),
9587         },
9588         [BPF_PROG_INFO_FUNC_INFO] = {
9589                 offsetof(struct bpf_prog_info, func_info),
9590                 offsetof(struct bpf_prog_info, nr_func_info),
9591                 offsetof(struct bpf_prog_info, func_info_rec_size),
9592         },
9593         [BPF_PROG_INFO_LINE_INFO] = {
9594                 offsetof(struct bpf_prog_info, line_info),
9595                 offsetof(struct bpf_prog_info, nr_line_info),
9596                 offsetof(struct bpf_prog_info, line_info_rec_size),
9597         },
9598         [BPF_PROG_INFO_JITED_LINE_INFO] = {
9599                 offsetof(struct bpf_prog_info, jited_line_info),
9600                 offsetof(struct bpf_prog_info, nr_jited_line_info),
9601                 offsetof(struct bpf_prog_info, jited_line_info_rec_size),
9602         },
9603         [BPF_PROG_INFO_PROG_TAGS] = {
9604                 offsetof(struct bpf_prog_info, prog_tags),
9605                 offsetof(struct bpf_prog_info, nr_prog_tags),
9606                 -(int)sizeof(__u8) * BPF_TAG_SIZE,
9607         },
9608
9609 };
9610
9611 static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
9612                                            int offset)
9613 {
9614         __u32 *array = (__u32 *)info;
9615
9616         if (offset >= 0)
9617                 return array[offset / sizeof(__u32)];
9618         return -(int)offset;
9619 }
9620
9621 static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
9622                                            int offset)
9623 {
9624         __u64 *array = (__u64 *)info;
9625
9626         if (offset >= 0)
9627                 return array[offset / sizeof(__u64)];
9628         return -(int)offset;
9629 }
9630
9631 static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
9632                                          __u32 val)
9633 {
9634         __u32 *array = (__u32 *)info;
9635
9636         if (offset >= 0)
9637                 array[offset / sizeof(__u32)] = val;
9638 }
9639
9640 static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
9641                                          __u64 val)
9642 {
9643         __u64 *array = (__u64 *)info;
9644
9645         if (offset >= 0)
9646                 array[offset / sizeof(__u64)] = val;
9647 }
9648
9649 struct bpf_prog_info_linear *
9650 bpf_program__get_prog_info_linear(int fd, __u64 arrays)
9651 {
9652         struct bpf_prog_info_linear *info_linear;
9653         struct bpf_prog_info info = {};
9654         __u32 info_len = sizeof(info);
9655         __u32 data_len = 0;
9656         int i, err;
9657         void *ptr;
9658
9659         if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
9660                 return ERR_PTR(-EINVAL);
9661
9662         /* step 1: get array dimensions */
9663         err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
9664         if (err) {
9665                 pr_debug("can't get prog info: %s", strerror(errno));
9666                 return ERR_PTR(-EFAULT);
9667         }
9668
9669         /* step 2: calculate total size of all arrays */
9670         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
9671                 bool include_array = (arrays & (1UL << i)) > 0;
9672                 struct bpf_prog_info_array_desc *desc;
9673                 __u32 count, size;
9674
9675                 desc = bpf_prog_info_array_desc + i;
9676
9677                 /* kernel is too old to support this field */
9678                 if (info_len < desc->array_offset + sizeof(__u32) ||
9679                     info_len < desc->count_offset + sizeof(__u32) ||
9680                     (desc->size_offset > 0 && info_len < desc->size_offset))
9681                         include_array = false;
9682
9683                 if (!include_array) {
9684                         arrays &= ~(1UL << i);  /* clear the bit */
9685                         continue;
9686                 }
9687
9688                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
9689                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
9690
9691                 data_len += count * size;
9692         }
9693
9694         /* step 3: allocate continuous memory */
9695         data_len = roundup(data_len, sizeof(__u64));
9696         info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
9697         if (!info_linear)
9698                 return ERR_PTR(-ENOMEM);
9699
9700         /* step 4: fill data to info_linear->info */
9701         info_linear->arrays = arrays;
9702         memset(&info_linear->info, 0, sizeof(info));
9703         ptr = info_linear->data;
9704
9705         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
9706                 struct bpf_prog_info_array_desc *desc;
9707                 __u32 count, size;
9708
9709                 if ((arrays & (1UL << i)) == 0)
9710                         continue;
9711
9712                 desc  = bpf_prog_info_array_desc + i;
9713                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
9714                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
9715                 bpf_prog_info_set_offset_u32(&info_linear->info,
9716                                              desc->count_offset, count);
9717                 bpf_prog_info_set_offset_u32(&info_linear->info,
9718                                              desc->size_offset, size);
9719                 bpf_prog_info_set_offset_u64(&info_linear->info,
9720                                              desc->array_offset,
9721                                              ptr_to_u64(ptr));
9722                 ptr += count * size;
9723         }
9724
9725         /* step 5: call syscall again to get required arrays */
9726         err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
9727         if (err) {
9728                 pr_debug("can't get prog info: %s", strerror(errno));
9729                 free(info_linear);
9730                 return ERR_PTR(-EFAULT);
9731         }
9732
9733         /* step 6: verify the data */
9734         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
9735                 struct bpf_prog_info_array_desc *desc;
9736                 __u32 v1, v2;
9737
9738                 if ((arrays & (1UL << i)) == 0)
9739                         continue;
9740
9741                 desc = bpf_prog_info_array_desc + i;
9742                 v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
9743                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
9744                                                    desc->count_offset);
9745                 if (v1 != v2)
9746                         pr_warn("%s: mismatch in element count\n", __func__);
9747
9748                 v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
9749                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
9750                                                    desc->size_offset);
9751                 if (v1 != v2)
9752                         pr_warn("%s: mismatch in rec size\n", __func__);
9753         }
9754
9755         /* step 7: update info_len and data_len */
9756         info_linear->info_len = sizeof(struct bpf_prog_info);
9757         info_linear->data_len = data_len;
9758
9759         return info_linear;
9760 }
9761
9762 void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
9763 {
9764         int i;
9765
9766         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
9767                 struct bpf_prog_info_array_desc *desc;
9768                 __u64 addr, offs;
9769
9770                 if ((info_linear->arrays & (1UL << i)) == 0)
9771                         continue;
9772
9773                 desc = bpf_prog_info_array_desc + i;
9774                 addr = bpf_prog_info_read_offset_u64(&info_linear->info,
9775                                                      desc->array_offset);
9776                 offs = addr - ptr_to_u64(info_linear->data);
9777                 bpf_prog_info_set_offset_u64(&info_linear->info,
9778                                              desc->array_offset, offs);
9779         }
9780 }
9781
9782 void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
9783 {
9784         int i;
9785
9786         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
9787                 struct bpf_prog_info_array_desc *desc;
9788                 __u64 addr, offs;
9789
9790                 if ((info_linear->arrays & (1UL << i)) == 0)
9791                         continue;
9792
9793                 desc = bpf_prog_info_array_desc + i;
9794                 offs = bpf_prog_info_read_offset_u64(&info_linear->info,
9795                                                      desc->array_offset);
9796                 addr = offs + ptr_to_u64(info_linear->data);
9797                 bpf_prog_info_set_offset_u64(&info_linear->info,
9798                                              desc->array_offset, addr);
9799         }
9800 }
9801
9802 int bpf_program__set_attach_target(struct bpf_program *prog,
9803                                    int attach_prog_fd,
9804                                    const char *attach_func_name)
9805 {
9806         int btf_id;
9807
9808         if (!prog || attach_prog_fd < 0 || !attach_func_name)
9809                 return -EINVAL;
9810
9811         if (attach_prog_fd)
9812                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
9813                                                  attach_prog_fd);
9814         else
9815                 btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
9816                                                attach_func_name,
9817                                                prog->expected_attach_type);
9818
9819         if (btf_id < 0)
9820                 return btf_id;
9821
9822         prog->attach_btf_id = btf_id;
9823         prog->attach_prog_fd = attach_prog_fd;
9824         return 0;
9825 }
9826
9827 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
9828 {
9829         int err = 0, n, len, start, end = -1;
9830         bool *tmp;
9831
9832         *mask = NULL;
9833         *mask_sz = 0;
9834
9835         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
9836         while (*s) {
9837                 if (*s == ',' || *s == '\n') {
9838                         s++;
9839                         continue;
9840                 }
9841                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
9842                 if (n <= 0 || n > 2) {
9843                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
9844                         err = -EINVAL;
9845                         goto cleanup;
9846                 } else if (n == 1) {
9847                         end = start;
9848                 }
9849                 if (start < 0 || start > end) {
9850                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
9851                                 start, end, s);
9852                         err = -EINVAL;
9853                         goto cleanup;
9854                 }
9855                 tmp = realloc(*mask, end + 1);
9856                 if (!tmp) {
9857                         err = -ENOMEM;
9858                         goto cleanup;
9859                 }
9860                 *mask = tmp;
9861                 memset(tmp + *mask_sz, 0, start - *mask_sz);
9862                 memset(tmp + start, 1, end - start + 1);
9863                 *mask_sz = end + 1;
9864                 s += len;
9865         }
9866         if (!*mask_sz) {
9867                 pr_warn("Empty CPU range\n");
9868                 return -EINVAL;
9869         }
9870         return 0;
9871 cleanup:
9872         free(*mask);
9873         *mask = NULL;
9874         return err;
9875 }
9876
9877 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
9878 {
9879         int fd, err = 0, len;
9880         char buf[128];
9881
9882         fd = open(fcpu, O_RDONLY);
9883         if (fd < 0) {
9884                 err = -errno;
9885                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
9886                 return err;
9887         }
9888         len = read(fd, buf, sizeof(buf));
9889         close(fd);
9890         if (len <= 0) {
9891                 err = len ? -errno : -EINVAL;
9892                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
9893                 return err;
9894         }
9895         if (len >= sizeof(buf)) {
9896                 pr_warn("CPU mask is too big in file %s\n", fcpu);
9897                 return -E2BIG;
9898         }
9899         buf[len] = '\0';
9900
9901         return parse_cpu_mask_str(buf, mask, mask_sz);
9902 }
9903
9904 int libbpf_num_possible_cpus(void)
9905 {
9906         static const char *fcpu = "/sys/devices/system/cpu/possible";
9907         static int cpus;
9908         int err, n, i, tmp_cpus;
9909         bool *mask;
9910
9911         tmp_cpus = READ_ONCE(cpus);
9912         if (tmp_cpus > 0)
9913                 return tmp_cpus;
9914
9915         err = parse_cpu_mask_file(fcpu, &mask, &n);
9916         if (err)
9917                 return err;
9918
9919         tmp_cpus = 0;
9920         for (i = 0; i < n; i++) {
9921                 if (mask[i])
9922                         tmp_cpus++;
9923         }
9924         free(mask);
9925
9926         WRITE_ONCE(cpus, tmp_cpus);
9927         return tmp_cpus;
9928 }
9929
9930 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
9931                               const struct bpf_object_open_opts *opts)
9932 {
9933         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
9934                 .object_name = s->name,
9935         );
9936         struct bpf_object *obj;
9937         int i;
9938
9939         /* Attempt to preserve opts->object_name, unless overriden by user
9940          * explicitly. Overwriting object name for skeletons is discouraged,
9941          * as it breaks global data maps, because they contain object name
9942          * prefix as their own map name prefix. When skeleton is generated,
9943          * bpftool is making an assumption that this name will stay the same.
9944          */
9945         if (opts) {
9946                 memcpy(&skel_opts, opts, sizeof(*opts));
9947                 if (!opts->object_name)
9948                         skel_opts.object_name = s->name;
9949         }
9950
9951         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
9952         if (IS_ERR(obj)) {
9953                 pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
9954                         s->name, PTR_ERR(obj));
9955                 return PTR_ERR(obj);
9956         }
9957
9958         *s->obj = obj;
9959
9960         for (i = 0; i < s->map_cnt; i++) {
9961                 struct bpf_map **map = s->maps[i].map;
9962                 const char *name = s->maps[i].name;
9963                 void **mmaped = s->maps[i].mmaped;
9964
9965                 *map = bpf_object__find_map_by_name(obj, name);
9966                 if (!*map) {
9967                         pr_warn("failed to find skeleton map '%s'\n", name);
9968                         return -ESRCH;
9969                 }
9970
9971                 /* externs shouldn't be pre-setup from user code */
9972                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
9973                         *mmaped = (*map)->mmaped;
9974         }
9975
9976         for (i = 0; i < s->prog_cnt; i++) {
9977                 struct bpf_program **prog = s->progs[i].prog;
9978                 const char *name = s->progs[i].name;
9979
9980                 *prog = bpf_object__find_program_by_name(obj, name);
9981                 if (!*prog) {
9982                         pr_warn("failed to find skeleton program '%s'\n", name);
9983                         return -ESRCH;
9984                 }
9985         }
9986
9987         return 0;
9988 }
9989
9990 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
9991 {
9992         int i, err;
9993
9994         err = bpf_object__load(*s->obj);
9995         if (err) {
9996                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
9997                 return err;
9998         }
9999
10000         for (i = 0; i < s->map_cnt; i++) {
10001                 struct bpf_map *map = *s->maps[i].map;
10002                 size_t mmap_sz = bpf_map_mmap_sz(map);
10003                 int prot, map_fd = bpf_map__fd(map);
10004                 void **mmaped = s->maps[i].mmaped;
10005
10006                 if (!mmaped)
10007                         continue;
10008
10009                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
10010                         *mmaped = NULL;
10011                         continue;
10012                 }
10013
10014                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
10015                         prot = PROT_READ;
10016                 else
10017                         prot = PROT_READ | PROT_WRITE;
10018
10019                 /* Remap anonymous mmap()-ed "map initialization image" as
10020                  * a BPF map-backed mmap()-ed memory, but preserving the same
10021                  * memory address. This will cause kernel to change process'
10022                  * page table to point to a different piece of kernel memory,
10023                  * but from userspace point of view memory address (and its
10024                  * contents, being identical at this point) will stay the
10025                  * same. This mapping will be released by bpf_object__close()
10026                  * as per normal clean up procedure, so we don't need to worry
10027                  * about it from skeleton's clean up perspective.
10028                  */
10029                 *mmaped = mmap(map->mmaped, mmap_sz, prot,
10030                                 MAP_SHARED | MAP_FIXED, map_fd, 0);
10031                 if (*mmaped == MAP_FAILED) {
10032                         err = -errno;
10033                         *mmaped = NULL;
10034                         pr_warn("failed to re-mmap() map '%s': %d\n",
10035                                  bpf_map__name(map), err);
10036                         return err;
10037                 }
10038         }
10039
10040         return 0;
10041 }
10042
10043 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
10044 {
10045         int i;
10046
10047         for (i = 0; i < s->prog_cnt; i++) {
10048                 struct bpf_program *prog = *s->progs[i].prog;
10049                 struct bpf_link **link = s->progs[i].link;
10050                 const struct bpf_sec_def *sec_def;
10051                 const char *sec_name = bpf_program__title(prog, false);
10052
10053                 if (!prog->load)
10054                         continue;
10055
10056                 sec_def = find_sec_def(sec_name);
10057                 if (!sec_def || !sec_def->attach_fn)
10058                         continue;
10059
10060                 *link = sec_def->attach_fn(sec_def, prog);
10061                 if (IS_ERR(*link)) {
10062                         pr_warn("failed to auto-attach program '%s': %ld\n",
10063                                 bpf_program__name(prog), PTR_ERR(*link));
10064                         return PTR_ERR(*link);
10065                 }
10066         }
10067
10068         return 0;
10069 }
10070
10071 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
10072 {
10073         int i;
10074
10075         for (i = 0; i < s->prog_cnt; i++) {
10076                 struct bpf_link **link = s->progs[i].link;
10077
10078                 bpf_link__destroy(*link);
10079                 *link = NULL;
10080         }
10081 }
10082
10083 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
10084 {
10085         if (s->progs)
10086                 bpf_object__detach_skeleton(s);
10087         if (s->obj)
10088                 bpf_object__close(*s->obj);
10089         free(s->maps);
10090         free(s->progs);
10091         free(s);
10092 }