tools: Remove feature-libelf-mmap feature detection
[linux-2.6-microblaze.git] / tools / lib / bpf / libbpf.c
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4  * Common eBPF ELF object loading operations.
5  *
6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8  * Copyright (C) 2015 Huawei Inc.
9  * Copyright (C) 2017 Nicira, Inc.
10  * Copyright (C) 2019 Isovalent, Inc.
11  */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <limits.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <endian.h>
25 #include <fcntl.h>
26 #include <errno.h>
27 #include <ctype.h>
28 #include <asm/unistd.h>
29 #include <linux/err.h>
30 #include <linux/kernel.h>
31 #include <linux/bpf.h>
32 #include <linux/btf.h>
33 #include <linux/filter.h>
34 #include <linux/list.h>
35 #include <linux/limits.h>
36 #include <linux/perf_event.h>
37 #include <linux/ring_buffer.h>
38 #include <linux/version.h>
39 #include <sys/epoll.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44 #include <sys/vfs.h>
45 #include <sys/utsname.h>
46 #include <sys/resource.h>
47 #include <libelf.h>
48 #include <gelf.h>
49 #include <zlib.h>
50
51 #include "libbpf.h"
52 #include "bpf.h"
53 #include "btf.h"
54 #include "str_error.h"
55 #include "libbpf_internal.h"
56 #include "hashmap.h"
57
58 #ifndef EM_BPF
59 #define EM_BPF 247
60 #endif
61
62 #ifndef BPF_FS_MAGIC
63 #define BPF_FS_MAGIC            0xcafe4a11
64 #endif
65
66 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
67  * compilation if user enables corresponding warning. Disable it explicitly.
68  */
69 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
70
71 #define __printf(a, b)  __attribute__((format(printf, a, b)))
72
73 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
74 static struct bpf_program *bpf_object__find_prog_by_idx(struct bpf_object *obj,
75                                                         int idx);
76 static const struct btf_type *
77 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
78
79 static int __base_pr(enum libbpf_print_level level, const char *format,
80                      va_list args)
81 {
82         if (level == LIBBPF_DEBUG)
83                 return 0;
84
85         return vfprintf(stderr, format, args);
86 }
87
88 static libbpf_print_fn_t __libbpf_pr = __base_pr;
89
90 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
91 {
92         libbpf_print_fn_t old_print_fn = __libbpf_pr;
93
94         __libbpf_pr = fn;
95         return old_print_fn;
96 }
97
98 __printf(2, 3)
99 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
100 {
101         va_list args;
102
103         if (!__libbpf_pr)
104                 return;
105
106         va_start(args, format);
107         __libbpf_pr(level, format, args);
108         va_end(args);
109 }
110
111 static void pr_perm_msg(int err)
112 {
113         struct rlimit limit;
114         char buf[100];
115
116         if (err != -EPERM || geteuid() != 0)
117                 return;
118
119         err = getrlimit(RLIMIT_MEMLOCK, &limit);
120         if (err)
121                 return;
122
123         if (limit.rlim_cur == RLIM_INFINITY)
124                 return;
125
126         if (limit.rlim_cur < 1024)
127                 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
128         else if (limit.rlim_cur < 1024*1024)
129                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
130         else
131                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
132
133         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
134                 buf);
135 }
136
137 #define STRERR_BUFSIZE  128
138
139 /* Copied from tools/perf/util/util.h */
140 #ifndef zfree
141 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
142 #endif
143
144 #ifndef zclose
145 # define zclose(fd) ({                  \
146         int ___err = 0;                 \
147         if ((fd) >= 0)                  \
148                 ___err = close((fd));   \
149         fd = -1;                        \
150         ___err; })
151 #endif
152
153 static inline __u64 ptr_to_u64(const void *ptr)
154 {
155         return (__u64) (unsigned long) ptr;
156 }
157
158 enum kern_feature_id {
159         /* v4.14: kernel support for program & map names. */
160         FEAT_PROG_NAME,
161         /* v5.2: kernel support for global data sections. */
162         FEAT_GLOBAL_DATA,
163         /* BTF support */
164         FEAT_BTF,
165         /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
166         FEAT_BTF_FUNC,
167         /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
168         FEAT_BTF_DATASEC,
169         /* BTF_FUNC_GLOBAL is supported */
170         FEAT_BTF_GLOBAL_FUNC,
171         /* BPF_F_MMAPABLE is supported for arrays */
172         FEAT_ARRAY_MMAP,
173         /* kernel support for expected_attach_type in BPF_PROG_LOAD */
174         FEAT_EXP_ATTACH_TYPE,
175         /* bpf_probe_read_{kernel,user}[_str] helpers */
176         FEAT_PROBE_READ_KERN,
177         __FEAT_CNT,
178 };
179
180 static bool kernel_supports(enum kern_feature_id feat_id);
181
182 enum reloc_type {
183         RELO_LD64,
184         RELO_CALL,
185         RELO_DATA,
186         RELO_EXTERN,
187 };
188
189 struct reloc_desc {
190         enum reloc_type type;
191         int insn_idx;
192         int map_idx;
193         int sym_off;
194 };
195
196 struct bpf_sec_def;
197
198 typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
199                                         struct bpf_program *prog);
200
201 struct bpf_sec_def {
202         const char *sec;
203         size_t len;
204         enum bpf_prog_type prog_type;
205         enum bpf_attach_type expected_attach_type;
206         bool is_exp_attach_type_optional;
207         bool is_attachable;
208         bool is_attach_btf;
209         attach_fn_t attach_fn;
210 };
211
212 /*
213  * bpf_prog should be a better name but it has been used in
214  * linux/filter.h.
215  */
216 struct bpf_program {
217         /* Index in elf obj file, for relocation use. */
218         int idx;
219         char *name;
220         int prog_ifindex;
221         char *section_name;
222         const struct bpf_sec_def *sec_def;
223         /* section_name with / replaced by _; makes recursive pinning
224          * in bpf_object__pin_programs easier
225          */
226         char *pin_name;
227         struct bpf_insn *insns;
228         size_t insns_cnt, main_prog_cnt;
229         enum bpf_prog_type type;
230         bool load;
231
232         struct reloc_desc *reloc_desc;
233         int nr_reloc;
234         int log_level;
235
236         struct {
237                 int nr;
238                 int *fds;
239         } instances;
240         bpf_program_prep_t preprocessor;
241
242         struct bpf_object *obj;
243         void *priv;
244         bpf_program_clear_priv_t clear_priv;
245
246         enum bpf_attach_type expected_attach_type;
247         __u32 attach_btf_id;
248         __u32 attach_prog_fd;
249         void *func_info;
250         __u32 func_info_rec_size;
251         __u32 func_info_cnt;
252
253         void *line_info;
254         __u32 line_info_rec_size;
255         __u32 line_info_cnt;
256         __u32 prog_flags;
257 };
258
259 struct bpf_struct_ops {
260         const char *tname;
261         const struct btf_type *type;
262         struct bpf_program **progs;
263         __u32 *kern_func_off;
264         /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
265         void *data;
266         /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
267          *      btf_vmlinux's format.
268          * struct bpf_struct_ops_tcp_congestion_ops {
269          *      [... some other kernel fields ...]
270          *      struct tcp_congestion_ops data;
271          * }
272          * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
273          * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
274          * from "data".
275          */
276         void *kern_vdata;
277         __u32 type_id;
278 };
279
280 #define DATA_SEC ".data"
281 #define BSS_SEC ".bss"
282 #define RODATA_SEC ".rodata"
283 #define KCONFIG_SEC ".kconfig"
284 #define KSYMS_SEC ".ksyms"
285 #define STRUCT_OPS_SEC ".struct_ops"
286
287 enum libbpf_map_type {
288         LIBBPF_MAP_UNSPEC,
289         LIBBPF_MAP_DATA,
290         LIBBPF_MAP_BSS,
291         LIBBPF_MAP_RODATA,
292         LIBBPF_MAP_KCONFIG,
293 };
294
295 static const char * const libbpf_type_to_btf_name[] = {
296         [LIBBPF_MAP_DATA]       = DATA_SEC,
297         [LIBBPF_MAP_BSS]        = BSS_SEC,
298         [LIBBPF_MAP_RODATA]     = RODATA_SEC,
299         [LIBBPF_MAP_KCONFIG]    = KCONFIG_SEC,
300 };
301
302 struct bpf_map {
303         char *name;
304         int fd;
305         int sec_idx;
306         size_t sec_offset;
307         int map_ifindex;
308         int inner_map_fd;
309         struct bpf_map_def def;
310         __u32 numa_node;
311         __u32 btf_var_idx;
312         __u32 btf_key_type_id;
313         __u32 btf_value_type_id;
314         __u32 btf_vmlinux_value_type_id;
315         void *priv;
316         bpf_map_clear_priv_t clear_priv;
317         enum libbpf_map_type libbpf_type;
318         void *mmaped;
319         struct bpf_struct_ops *st_ops;
320         struct bpf_map *inner_map;
321         void **init_slots;
322         int init_slots_sz;
323         char *pin_path;
324         bool pinned;
325         bool reused;
326 };
327
328 enum extern_type {
329         EXT_UNKNOWN,
330         EXT_KCFG,
331         EXT_KSYM,
332 };
333
334 enum kcfg_type {
335         KCFG_UNKNOWN,
336         KCFG_CHAR,
337         KCFG_BOOL,
338         KCFG_INT,
339         KCFG_TRISTATE,
340         KCFG_CHAR_ARR,
341 };
342
343 struct extern_desc {
344         enum extern_type type;
345         int sym_idx;
346         int btf_id;
347         int sec_btf_id;
348         const char *name;
349         bool is_set;
350         bool is_weak;
351         union {
352                 struct {
353                         enum kcfg_type type;
354                         int sz;
355                         int align;
356                         int data_off;
357                         bool is_signed;
358                 } kcfg;
359                 struct {
360                         unsigned long long addr;
361                 } ksym;
362         };
363 };
364
365 static LIST_HEAD(bpf_objects_list);
366
367 struct bpf_object {
368         char name[BPF_OBJ_NAME_LEN];
369         char license[64];
370         __u32 kern_version;
371
372         struct bpf_program *programs;
373         size_t nr_programs;
374         struct bpf_map *maps;
375         size_t nr_maps;
376         size_t maps_cap;
377
378         char *kconfig;
379         struct extern_desc *externs;
380         int nr_extern;
381         int kconfig_map_idx;
382
383         bool loaded;
384         bool has_pseudo_calls;
385
386         /*
387          * Information when doing elf related work. Only valid if fd
388          * is valid.
389          */
390         struct {
391                 int fd;
392                 const void *obj_buf;
393                 size_t obj_buf_sz;
394                 Elf *elf;
395                 GElf_Ehdr ehdr;
396                 Elf_Data *symbols;
397                 Elf_Data *data;
398                 Elf_Data *rodata;
399                 Elf_Data *bss;
400                 Elf_Data *st_ops_data;
401                 size_t strtabidx;
402                 struct {
403                         GElf_Shdr shdr;
404                         Elf_Data *data;
405                 } *reloc_sects;
406                 int nr_reloc_sects;
407                 int maps_shndx;
408                 int btf_maps_shndx;
409                 __u32 btf_maps_sec_btf_id;
410                 int text_shndx;
411                 int symbols_shndx;
412                 int data_shndx;
413                 int rodata_shndx;
414                 int bss_shndx;
415                 int st_ops_shndx;
416         } efile;
417         /*
418          * All loaded bpf_object is linked in a list, which is
419          * hidden to caller. bpf_objects__<func> handlers deal with
420          * all objects.
421          */
422         struct list_head list;
423
424         struct btf *btf;
425         /* Parse and load BTF vmlinux if any of the programs in the object need
426          * it at load time.
427          */
428         struct btf *btf_vmlinux;
429         struct btf_ext *btf_ext;
430
431         void *priv;
432         bpf_object_clear_priv_t clear_priv;
433
434         char path[];
435 };
436 #define obj_elf_valid(o)        ((o)->efile.elf)
437
438 void bpf_program__unload(struct bpf_program *prog)
439 {
440         int i;
441
442         if (!prog)
443                 return;
444
445         /*
446          * If the object is opened but the program was never loaded,
447          * it is possible that prog->instances.nr == -1.
448          */
449         if (prog->instances.nr > 0) {
450                 for (i = 0; i < prog->instances.nr; i++)
451                         zclose(prog->instances.fds[i]);
452         } else if (prog->instances.nr != -1) {
453                 pr_warn("Internal error: instances.nr is %d\n",
454                         prog->instances.nr);
455         }
456
457         prog->instances.nr = -1;
458         zfree(&prog->instances.fds);
459
460         zfree(&prog->func_info);
461         zfree(&prog->line_info);
462 }
463
464 static void bpf_program__exit(struct bpf_program *prog)
465 {
466         if (!prog)
467                 return;
468
469         if (prog->clear_priv)
470                 prog->clear_priv(prog, prog->priv);
471
472         prog->priv = NULL;
473         prog->clear_priv = NULL;
474
475         bpf_program__unload(prog);
476         zfree(&prog->name);
477         zfree(&prog->section_name);
478         zfree(&prog->pin_name);
479         zfree(&prog->insns);
480         zfree(&prog->reloc_desc);
481
482         prog->nr_reloc = 0;
483         prog->insns_cnt = 0;
484         prog->idx = -1;
485 }
486
487 static char *__bpf_program__pin_name(struct bpf_program *prog)
488 {
489         char *name, *p;
490
491         name = p = strdup(prog->section_name);
492         while ((p = strchr(p, '/')))
493                 *p = '_';
494
495         return name;
496 }
497
498 static int
499 bpf_program__init(void *data, size_t size, char *section_name, int idx,
500                   struct bpf_program *prog)
501 {
502         const size_t bpf_insn_sz = sizeof(struct bpf_insn);
503
504         if (size == 0 || size % bpf_insn_sz) {
505                 pr_warn("corrupted section '%s', size: %zu\n",
506                         section_name, size);
507                 return -EINVAL;
508         }
509
510         memset(prog, 0, sizeof(*prog));
511
512         prog->section_name = strdup(section_name);
513         if (!prog->section_name) {
514                 pr_warn("failed to alloc name for prog under section(%d) %s\n",
515                         idx, section_name);
516                 goto errout;
517         }
518
519         prog->pin_name = __bpf_program__pin_name(prog);
520         if (!prog->pin_name) {
521                 pr_warn("failed to alloc pin name for prog under section(%d) %s\n",
522                         idx, section_name);
523                 goto errout;
524         }
525
526         prog->insns = malloc(size);
527         if (!prog->insns) {
528                 pr_warn("failed to alloc insns for prog under section %s\n",
529                         section_name);
530                 goto errout;
531         }
532         prog->insns_cnt = size / bpf_insn_sz;
533         memcpy(prog->insns, data, size);
534         prog->idx = idx;
535         prog->instances.fds = NULL;
536         prog->instances.nr = -1;
537         prog->type = BPF_PROG_TYPE_UNSPEC;
538         prog->load = true;
539
540         return 0;
541 errout:
542         bpf_program__exit(prog);
543         return -ENOMEM;
544 }
545
546 static int
547 bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
548                         char *section_name, int idx)
549 {
550         struct bpf_program prog, *progs;
551         int nr_progs, err;
552
553         err = bpf_program__init(data, size, section_name, idx, &prog);
554         if (err)
555                 return err;
556
557         progs = obj->programs;
558         nr_progs = obj->nr_programs;
559
560         progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
561         if (!progs) {
562                 /*
563                  * In this case the original obj->programs
564                  * is still valid, so don't need special treat for
565                  * bpf_close_object().
566                  */
567                 pr_warn("failed to alloc a new program under section '%s'\n",
568                         section_name);
569                 bpf_program__exit(&prog);
570                 return -ENOMEM;
571         }
572
573         pr_debug("found program %s\n", prog.section_name);
574         obj->programs = progs;
575         obj->nr_programs = nr_progs + 1;
576         prog.obj = obj;
577         progs[nr_progs] = prog;
578         return 0;
579 }
580
581 static int
582 bpf_object__init_prog_names(struct bpf_object *obj)
583 {
584         Elf_Data *symbols = obj->efile.symbols;
585         struct bpf_program *prog;
586         size_t pi, si;
587
588         for (pi = 0; pi < obj->nr_programs; pi++) {
589                 const char *name = NULL;
590
591                 prog = &obj->programs[pi];
592
593                 for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
594                      si++) {
595                         GElf_Sym sym;
596
597                         if (!gelf_getsym(symbols, si, &sym))
598                                 continue;
599                         if (sym.st_shndx != prog->idx)
600                                 continue;
601                         if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
602                                 continue;
603
604                         name = elf_strptr(obj->efile.elf,
605                                           obj->efile.strtabidx,
606                                           sym.st_name);
607                         if (!name) {
608                                 pr_warn("failed to get sym name string for prog %s\n",
609                                         prog->section_name);
610                                 return -LIBBPF_ERRNO__LIBELF;
611                         }
612                 }
613
614                 if (!name && prog->idx == obj->efile.text_shndx)
615                         name = ".text";
616
617                 if (!name) {
618                         pr_warn("failed to find sym for prog %s\n",
619                                 prog->section_name);
620                         return -EINVAL;
621                 }
622
623                 prog->name = strdup(name);
624                 if (!prog->name) {
625                         pr_warn("failed to allocate memory for prog sym %s\n",
626                                 name);
627                         return -ENOMEM;
628                 }
629         }
630
631         return 0;
632 }
633
634 static __u32 get_kernel_version(void)
635 {
636         __u32 major, minor, patch;
637         struct utsname info;
638
639         uname(&info);
640         if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
641                 return 0;
642         return KERNEL_VERSION(major, minor, patch);
643 }
644
645 static const struct btf_member *
646 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
647 {
648         struct btf_member *m;
649         int i;
650
651         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
652                 if (btf_member_bit_offset(t, i) == bit_offset)
653                         return m;
654         }
655
656         return NULL;
657 }
658
659 static const struct btf_member *
660 find_member_by_name(const struct btf *btf, const struct btf_type *t,
661                     const char *name)
662 {
663         struct btf_member *m;
664         int i;
665
666         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
667                 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
668                         return m;
669         }
670
671         return NULL;
672 }
673
674 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
675 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
676                                    const char *name, __u32 kind);
677
678 static int
679 find_struct_ops_kern_types(const struct btf *btf, const char *tname,
680                            const struct btf_type **type, __u32 *type_id,
681                            const struct btf_type **vtype, __u32 *vtype_id,
682                            const struct btf_member **data_member)
683 {
684         const struct btf_type *kern_type, *kern_vtype;
685         const struct btf_member *kern_data_member;
686         __s32 kern_vtype_id, kern_type_id;
687         __u32 i;
688
689         kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
690         if (kern_type_id < 0) {
691                 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
692                         tname);
693                 return kern_type_id;
694         }
695         kern_type = btf__type_by_id(btf, kern_type_id);
696
697         /* Find the corresponding "map_value" type that will be used
698          * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
699          * find "struct bpf_struct_ops_tcp_congestion_ops" from the
700          * btf_vmlinux.
701          */
702         kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
703                                                 tname, BTF_KIND_STRUCT);
704         if (kern_vtype_id < 0) {
705                 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
706                         STRUCT_OPS_VALUE_PREFIX, tname);
707                 return kern_vtype_id;
708         }
709         kern_vtype = btf__type_by_id(btf, kern_vtype_id);
710
711         /* Find "struct tcp_congestion_ops" from
712          * struct bpf_struct_ops_tcp_congestion_ops {
713          *      [ ... ]
714          *      struct tcp_congestion_ops data;
715          * }
716          */
717         kern_data_member = btf_members(kern_vtype);
718         for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
719                 if (kern_data_member->type == kern_type_id)
720                         break;
721         }
722         if (i == btf_vlen(kern_vtype)) {
723                 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
724                         tname, STRUCT_OPS_VALUE_PREFIX, tname);
725                 return -EINVAL;
726         }
727
728         *type = kern_type;
729         *type_id = kern_type_id;
730         *vtype = kern_vtype;
731         *vtype_id = kern_vtype_id;
732         *data_member = kern_data_member;
733
734         return 0;
735 }
736
737 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
738 {
739         return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
740 }
741
742 /* Init the map's fields that depend on kern_btf */
743 static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
744                                          const struct btf *btf,
745                                          const struct btf *kern_btf)
746 {
747         const struct btf_member *member, *kern_member, *kern_data_member;
748         const struct btf_type *type, *kern_type, *kern_vtype;
749         __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
750         struct bpf_struct_ops *st_ops;
751         void *data, *kern_data;
752         const char *tname;
753         int err;
754
755         st_ops = map->st_ops;
756         type = st_ops->type;
757         tname = st_ops->tname;
758         err = find_struct_ops_kern_types(kern_btf, tname,
759                                          &kern_type, &kern_type_id,
760                                          &kern_vtype, &kern_vtype_id,
761                                          &kern_data_member);
762         if (err)
763                 return err;
764
765         pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
766                  map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
767
768         map->def.value_size = kern_vtype->size;
769         map->btf_vmlinux_value_type_id = kern_vtype_id;
770
771         st_ops->kern_vdata = calloc(1, kern_vtype->size);
772         if (!st_ops->kern_vdata)
773                 return -ENOMEM;
774
775         data = st_ops->data;
776         kern_data_off = kern_data_member->offset / 8;
777         kern_data = st_ops->kern_vdata + kern_data_off;
778
779         member = btf_members(type);
780         for (i = 0; i < btf_vlen(type); i++, member++) {
781                 const struct btf_type *mtype, *kern_mtype;
782                 __u32 mtype_id, kern_mtype_id;
783                 void *mdata, *kern_mdata;
784                 __s64 msize, kern_msize;
785                 __u32 moff, kern_moff;
786                 __u32 kern_member_idx;
787                 const char *mname;
788
789                 mname = btf__name_by_offset(btf, member->name_off);
790                 kern_member = find_member_by_name(kern_btf, kern_type, mname);
791                 if (!kern_member) {
792                         pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
793                                 map->name, mname);
794                         return -ENOTSUP;
795                 }
796
797                 kern_member_idx = kern_member - btf_members(kern_type);
798                 if (btf_member_bitfield_size(type, i) ||
799                     btf_member_bitfield_size(kern_type, kern_member_idx)) {
800                         pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
801                                 map->name, mname);
802                         return -ENOTSUP;
803                 }
804
805                 moff = member->offset / 8;
806                 kern_moff = kern_member->offset / 8;
807
808                 mdata = data + moff;
809                 kern_mdata = kern_data + kern_moff;
810
811                 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
812                 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
813                                                     &kern_mtype_id);
814                 if (BTF_INFO_KIND(mtype->info) !=
815                     BTF_INFO_KIND(kern_mtype->info)) {
816                         pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
817                                 map->name, mname, BTF_INFO_KIND(mtype->info),
818                                 BTF_INFO_KIND(kern_mtype->info));
819                         return -ENOTSUP;
820                 }
821
822                 if (btf_is_ptr(mtype)) {
823                         struct bpf_program *prog;
824
825                         mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id);
826                         kern_mtype = skip_mods_and_typedefs(kern_btf,
827                                                             kern_mtype->type,
828                                                             &kern_mtype_id);
829                         if (!btf_is_func_proto(mtype) ||
830                             !btf_is_func_proto(kern_mtype)) {
831                                 pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n",
832                                         map->name, mname);
833                                 return -ENOTSUP;
834                         }
835
836                         prog = st_ops->progs[i];
837                         if (!prog) {
838                                 pr_debug("struct_ops init_kern %s: func ptr %s is not set\n",
839                                          map->name, mname);
840                                 continue;
841                         }
842
843                         prog->attach_btf_id = kern_type_id;
844                         prog->expected_attach_type = kern_member_idx;
845
846                         st_ops->kern_func_off[i] = kern_data_off + kern_moff;
847
848                         pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
849                                  map->name, mname, prog->name, moff,
850                                  kern_moff);
851
852                         continue;
853                 }
854
855                 msize = btf__resolve_size(btf, mtype_id);
856                 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
857                 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
858                         pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
859                                 map->name, mname, (ssize_t)msize,
860                                 (ssize_t)kern_msize);
861                         return -ENOTSUP;
862                 }
863
864                 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
865                          map->name, mname, (unsigned int)msize,
866                          moff, kern_moff);
867                 memcpy(kern_mdata, mdata, msize);
868         }
869
870         return 0;
871 }
872
873 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
874 {
875         struct bpf_map *map;
876         size_t i;
877         int err;
878
879         for (i = 0; i < obj->nr_maps; i++) {
880                 map = &obj->maps[i];
881
882                 if (!bpf_map__is_struct_ops(map))
883                         continue;
884
885                 err = bpf_map__init_kern_struct_ops(map, obj->btf,
886                                                     obj->btf_vmlinux);
887                 if (err)
888                         return err;
889         }
890
891         return 0;
892 }
893
894 static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
895 {
896         const struct btf_type *type, *datasec;
897         const struct btf_var_secinfo *vsi;
898         struct bpf_struct_ops *st_ops;
899         const char *tname, *var_name;
900         __s32 type_id, datasec_id;
901         const struct btf *btf;
902         struct bpf_map *map;
903         __u32 i;
904
905         if (obj->efile.st_ops_shndx == -1)
906                 return 0;
907
908         btf = obj->btf;
909         datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
910                                             BTF_KIND_DATASEC);
911         if (datasec_id < 0) {
912                 pr_warn("struct_ops init: DATASEC %s not found\n",
913                         STRUCT_OPS_SEC);
914                 return -EINVAL;
915         }
916
917         datasec = btf__type_by_id(btf, datasec_id);
918         vsi = btf_var_secinfos(datasec);
919         for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
920                 type = btf__type_by_id(obj->btf, vsi->type);
921                 var_name = btf__name_by_offset(obj->btf, type->name_off);
922
923                 type_id = btf__resolve_type(obj->btf, vsi->type);
924                 if (type_id < 0) {
925                         pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
926                                 vsi->type, STRUCT_OPS_SEC);
927                         return -EINVAL;
928                 }
929
930                 type = btf__type_by_id(obj->btf, type_id);
931                 tname = btf__name_by_offset(obj->btf, type->name_off);
932                 if (!tname[0]) {
933                         pr_warn("struct_ops init: anonymous type is not supported\n");
934                         return -ENOTSUP;
935                 }
936                 if (!btf_is_struct(type)) {
937                         pr_warn("struct_ops init: %s is not a struct\n", tname);
938                         return -EINVAL;
939                 }
940
941                 map = bpf_object__add_map(obj);
942                 if (IS_ERR(map))
943                         return PTR_ERR(map);
944
945                 map->sec_idx = obj->efile.st_ops_shndx;
946                 map->sec_offset = vsi->offset;
947                 map->name = strdup(var_name);
948                 if (!map->name)
949                         return -ENOMEM;
950
951                 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
952                 map->def.key_size = sizeof(int);
953                 map->def.value_size = type->size;
954                 map->def.max_entries = 1;
955
956                 map->st_ops = calloc(1, sizeof(*map->st_ops));
957                 if (!map->st_ops)
958                         return -ENOMEM;
959                 st_ops = map->st_ops;
960                 st_ops->data = malloc(type->size);
961                 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
962                 st_ops->kern_func_off = malloc(btf_vlen(type) *
963                                                sizeof(*st_ops->kern_func_off));
964                 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
965                         return -ENOMEM;
966
967                 if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
968                         pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
969                                 var_name, STRUCT_OPS_SEC);
970                         return -EINVAL;
971                 }
972
973                 memcpy(st_ops->data,
974                        obj->efile.st_ops_data->d_buf + vsi->offset,
975                        type->size);
976                 st_ops->tname = tname;
977                 st_ops->type = type;
978                 st_ops->type_id = type_id;
979
980                 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
981                          tname, type_id, var_name, vsi->offset);
982         }
983
984         return 0;
985 }
986
987 static struct bpf_object *bpf_object__new(const char *path,
988                                           const void *obj_buf,
989                                           size_t obj_buf_sz,
990                                           const char *obj_name)
991 {
992         struct bpf_object *obj;
993         char *end;
994
995         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
996         if (!obj) {
997                 pr_warn("alloc memory failed for %s\n", path);
998                 return ERR_PTR(-ENOMEM);
999         }
1000
1001         strcpy(obj->path, path);
1002         if (obj_name) {
1003                 strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
1004                 obj->name[sizeof(obj->name) - 1] = 0;
1005         } else {
1006                 /* Using basename() GNU version which doesn't modify arg. */
1007                 strncpy(obj->name, basename((void *)path),
1008                         sizeof(obj->name) - 1);
1009                 end = strchr(obj->name, '.');
1010                 if (end)
1011                         *end = 0;
1012         }
1013
1014         obj->efile.fd = -1;
1015         /*
1016          * Caller of this function should also call
1017          * bpf_object__elf_finish() after data collection to return
1018          * obj_buf to user. If not, we should duplicate the buffer to
1019          * avoid user freeing them before elf finish.
1020          */
1021         obj->efile.obj_buf = obj_buf;
1022         obj->efile.obj_buf_sz = obj_buf_sz;
1023         obj->efile.maps_shndx = -1;
1024         obj->efile.btf_maps_shndx = -1;
1025         obj->efile.data_shndx = -1;
1026         obj->efile.rodata_shndx = -1;
1027         obj->efile.bss_shndx = -1;
1028         obj->efile.st_ops_shndx = -1;
1029         obj->kconfig_map_idx = -1;
1030
1031         obj->kern_version = get_kernel_version();
1032         obj->loaded = false;
1033
1034         INIT_LIST_HEAD(&obj->list);
1035         list_add(&obj->list, &bpf_objects_list);
1036         return obj;
1037 }
1038
1039 static void bpf_object__elf_finish(struct bpf_object *obj)
1040 {
1041         if (!obj_elf_valid(obj))
1042                 return;
1043
1044         if (obj->efile.elf) {
1045                 elf_end(obj->efile.elf);
1046                 obj->efile.elf = NULL;
1047         }
1048         obj->efile.symbols = NULL;
1049         obj->efile.data = NULL;
1050         obj->efile.rodata = NULL;
1051         obj->efile.bss = NULL;
1052         obj->efile.st_ops_data = NULL;
1053
1054         zfree(&obj->efile.reloc_sects);
1055         obj->efile.nr_reloc_sects = 0;
1056         zclose(obj->efile.fd);
1057         obj->efile.obj_buf = NULL;
1058         obj->efile.obj_buf_sz = 0;
1059 }
1060
1061 /* if libelf is old and doesn't support mmap(), fall back to read() */
1062 #ifndef ELF_C_READ_MMAP
1063 #define ELF_C_READ_MMAP ELF_C_READ
1064 #endif
1065
1066 static int bpf_object__elf_init(struct bpf_object *obj)
1067 {
1068         int err = 0;
1069         GElf_Ehdr *ep;
1070
1071         if (obj_elf_valid(obj)) {
1072                 pr_warn("elf init: internal error\n");
1073                 return -LIBBPF_ERRNO__LIBELF;
1074         }
1075
1076         if (obj->efile.obj_buf_sz > 0) {
1077                 /*
1078                  * obj_buf should have been validated by
1079                  * bpf_object__open_buffer().
1080                  */
1081                 obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
1082                                             obj->efile.obj_buf_sz);
1083         } else {
1084                 obj->efile.fd = open(obj->path, O_RDONLY);
1085                 if (obj->efile.fd < 0) {
1086                         char errmsg[STRERR_BUFSIZE], *cp;
1087
1088                         err = -errno;
1089                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1090                         pr_warn("failed to open %s: %s\n", obj->path, cp);
1091                         return err;
1092                 }
1093
1094                 obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1095         }
1096
1097         if (!obj->efile.elf) {
1098                 pr_warn("failed to open %s as ELF file\n", obj->path);
1099                 err = -LIBBPF_ERRNO__LIBELF;
1100                 goto errout;
1101         }
1102
1103         if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
1104                 pr_warn("failed to get EHDR from %s\n", obj->path);
1105                 err = -LIBBPF_ERRNO__FORMAT;
1106                 goto errout;
1107         }
1108         ep = &obj->efile.ehdr;
1109
1110         /* Old LLVM set e_machine to EM_NONE */
1111         if (ep->e_type != ET_REL ||
1112             (ep->e_machine && ep->e_machine != EM_BPF)) {
1113                 pr_warn("%s is not an eBPF object file\n", obj->path);
1114                 err = -LIBBPF_ERRNO__FORMAT;
1115                 goto errout;
1116         }
1117
1118         return 0;
1119 errout:
1120         bpf_object__elf_finish(obj);
1121         return err;
1122 }
1123
1124 static int bpf_object__check_endianness(struct bpf_object *obj)
1125 {
1126 #if __BYTE_ORDER == __LITTLE_ENDIAN
1127         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
1128                 return 0;
1129 #elif __BYTE_ORDER == __BIG_ENDIAN
1130         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
1131                 return 0;
1132 #else
1133 # error "Unrecognized __BYTE_ORDER__"
1134 #endif
1135         pr_warn("endianness mismatch.\n");
1136         return -LIBBPF_ERRNO__ENDIAN;
1137 }
1138
1139 static int
1140 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1141 {
1142         memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
1143         pr_debug("license of %s is %s\n", obj->path, obj->license);
1144         return 0;
1145 }
1146
1147 static int
1148 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1149 {
1150         __u32 kver;
1151
1152         if (size != sizeof(kver)) {
1153                 pr_warn("invalid kver section in %s\n", obj->path);
1154                 return -LIBBPF_ERRNO__FORMAT;
1155         }
1156         memcpy(&kver, data, sizeof(kver));
1157         obj->kern_version = kver;
1158         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1159         return 0;
1160 }
1161
1162 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1163 {
1164         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1165             type == BPF_MAP_TYPE_HASH_OF_MAPS)
1166                 return true;
1167         return false;
1168 }
1169
1170 static int bpf_object_search_section_size(const struct bpf_object *obj,
1171                                           const char *name, size_t *d_size)
1172 {
1173         const GElf_Ehdr *ep = &obj->efile.ehdr;
1174         Elf *elf = obj->efile.elf;
1175         Elf_Scn *scn = NULL;
1176         int idx = 0;
1177
1178         while ((scn = elf_nextscn(elf, scn)) != NULL) {
1179                 const char *sec_name;
1180                 Elf_Data *data;
1181                 GElf_Shdr sh;
1182
1183                 idx++;
1184                 if (gelf_getshdr(scn, &sh) != &sh) {
1185                         pr_warn("failed to get section(%d) header from %s\n",
1186                                 idx, obj->path);
1187                         return -EIO;
1188                 }
1189
1190                 sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
1191                 if (!sec_name) {
1192                         pr_warn("failed to get section(%d) name from %s\n",
1193                                 idx, obj->path);
1194                         return -EIO;
1195                 }
1196
1197                 if (strcmp(name, sec_name))
1198                         continue;
1199
1200                 data = elf_getdata(scn, 0);
1201                 if (!data) {
1202                         pr_warn("failed to get section(%d) data from %s(%s)\n",
1203                                 idx, name, obj->path);
1204                         return -EIO;
1205                 }
1206
1207                 *d_size = data->d_size;
1208                 return 0;
1209         }
1210
1211         return -ENOENT;
1212 }
1213
1214 int bpf_object__section_size(const struct bpf_object *obj, const char *name,
1215                              __u32 *size)
1216 {
1217         int ret = -ENOENT;
1218         size_t d_size;
1219
1220         *size = 0;
1221         if (!name) {
1222                 return -EINVAL;
1223         } else if (!strcmp(name, DATA_SEC)) {
1224                 if (obj->efile.data)
1225                         *size = obj->efile.data->d_size;
1226         } else if (!strcmp(name, BSS_SEC)) {
1227                 if (obj->efile.bss)
1228                         *size = obj->efile.bss->d_size;
1229         } else if (!strcmp(name, RODATA_SEC)) {
1230                 if (obj->efile.rodata)
1231                         *size = obj->efile.rodata->d_size;
1232         } else if (!strcmp(name, STRUCT_OPS_SEC)) {
1233                 if (obj->efile.st_ops_data)
1234                         *size = obj->efile.st_ops_data->d_size;
1235         } else {
1236                 ret = bpf_object_search_section_size(obj, name, &d_size);
1237                 if (!ret)
1238                         *size = d_size;
1239         }
1240
1241         return *size ? 0 : ret;
1242 }
1243
1244 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
1245                                 __u32 *off)
1246 {
1247         Elf_Data *symbols = obj->efile.symbols;
1248         const char *sname;
1249         size_t si;
1250
1251         if (!name || !off)
1252                 return -EINVAL;
1253
1254         for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
1255                 GElf_Sym sym;
1256
1257                 if (!gelf_getsym(symbols, si, &sym))
1258                         continue;
1259                 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1260                     GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
1261                         continue;
1262
1263                 sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
1264                                    sym.st_name);
1265                 if (!sname) {
1266                         pr_warn("failed to get sym name string for var %s\n",
1267                                 name);
1268                         return -EIO;
1269                 }
1270                 if (strcmp(name, sname) == 0) {
1271                         *off = sym.st_value;
1272                         return 0;
1273                 }
1274         }
1275
1276         return -ENOENT;
1277 }
1278
1279 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1280 {
1281         struct bpf_map *new_maps;
1282         size_t new_cap;
1283         int i;
1284
1285         if (obj->nr_maps < obj->maps_cap)
1286                 return &obj->maps[obj->nr_maps++];
1287
1288         new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
1289         new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
1290         if (!new_maps) {
1291                 pr_warn("alloc maps for object failed\n");
1292                 return ERR_PTR(-ENOMEM);
1293         }
1294
1295         obj->maps_cap = new_cap;
1296         obj->maps = new_maps;
1297
1298         /* zero out new maps */
1299         memset(obj->maps + obj->nr_maps, 0,
1300                (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
1301         /*
1302          * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
1303          * when failure (zclose won't close negative fd)).
1304          */
1305         for (i = obj->nr_maps; i < obj->maps_cap; i++) {
1306                 obj->maps[i].fd = -1;
1307                 obj->maps[i].inner_map_fd = -1;
1308         }
1309
1310         return &obj->maps[obj->nr_maps++];
1311 }
1312
1313 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1314 {
1315         long page_sz = sysconf(_SC_PAGE_SIZE);
1316         size_t map_sz;
1317
1318         map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
1319         map_sz = roundup(map_sz, page_sz);
1320         return map_sz;
1321 }
1322
1323 static char *internal_map_name(struct bpf_object *obj,
1324                                enum libbpf_map_type type)
1325 {
1326         char map_name[BPF_OBJ_NAME_LEN], *p;
1327         const char *sfx = libbpf_type_to_btf_name[type];
1328         int sfx_len = max((size_t)7, strlen(sfx));
1329         int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
1330                           strlen(obj->name));
1331
1332         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1333                  sfx_len, libbpf_type_to_btf_name[type]);
1334
1335         /* sanitise map name to characters allowed by kernel */
1336         for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1337                 if (!isalnum(*p) && *p != '_' && *p != '.')
1338                         *p = '_';
1339
1340         return strdup(map_name);
1341 }
1342
1343 static int
1344 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1345                               int sec_idx, void *data, size_t data_sz)
1346 {
1347         struct bpf_map_def *def;
1348         struct bpf_map *map;
1349         int err;
1350
1351         map = bpf_object__add_map(obj);
1352         if (IS_ERR(map))
1353                 return PTR_ERR(map);
1354
1355         map->libbpf_type = type;
1356         map->sec_idx = sec_idx;
1357         map->sec_offset = 0;
1358         map->name = internal_map_name(obj, type);
1359         if (!map->name) {
1360                 pr_warn("failed to alloc map name\n");
1361                 return -ENOMEM;
1362         }
1363
1364         def = &map->def;
1365         def->type = BPF_MAP_TYPE_ARRAY;
1366         def->key_size = sizeof(int);
1367         def->value_size = data_sz;
1368         def->max_entries = 1;
1369         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1370                          ? BPF_F_RDONLY_PROG : 0;
1371         def->map_flags |= BPF_F_MMAPABLE;
1372
1373         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1374                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
1375
1376         map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
1377                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1378         if (map->mmaped == MAP_FAILED) {
1379                 err = -errno;
1380                 map->mmaped = NULL;
1381                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1382                         map->name, err);
1383                 zfree(&map->name);
1384                 return err;
1385         }
1386
1387         if (data)
1388                 memcpy(map->mmaped, data, data_sz);
1389
1390         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1391         return 0;
1392 }
1393
1394 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1395 {
1396         int err;
1397
1398         /*
1399          * Populate obj->maps with libbpf internal maps.
1400          */
1401         if (obj->efile.data_shndx >= 0) {
1402                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1403                                                     obj->efile.data_shndx,
1404                                                     obj->efile.data->d_buf,
1405                                                     obj->efile.data->d_size);
1406                 if (err)
1407                         return err;
1408         }
1409         if (obj->efile.rodata_shndx >= 0) {
1410                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1411                                                     obj->efile.rodata_shndx,
1412                                                     obj->efile.rodata->d_buf,
1413                                                     obj->efile.rodata->d_size);
1414                 if (err)
1415                         return err;
1416         }
1417         if (obj->efile.bss_shndx >= 0) {
1418                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1419                                                     obj->efile.bss_shndx,
1420                                                     NULL,
1421                                                     obj->efile.bss->d_size);
1422                 if (err)
1423                         return err;
1424         }
1425         return 0;
1426 }
1427
1428
1429 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1430                                                const void *name)
1431 {
1432         int i;
1433
1434         for (i = 0; i < obj->nr_extern; i++) {
1435                 if (strcmp(obj->externs[i].name, name) == 0)
1436                         return &obj->externs[i];
1437         }
1438         return NULL;
1439 }
1440
1441 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1442                               char value)
1443 {
1444         switch (ext->kcfg.type) {
1445         case KCFG_BOOL:
1446                 if (value == 'm') {
1447                         pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
1448                                 ext->name, value);
1449                         return -EINVAL;
1450                 }
1451                 *(bool *)ext_val = value == 'y' ? true : false;
1452                 break;
1453         case KCFG_TRISTATE:
1454                 if (value == 'y')
1455                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1456                 else if (value == 'm')
1457                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1458                 else /* value == 'n' */
1459                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1460                 break;
1461         case KCFG_CHAR:
1462                 *(char *)ext_val = value;
1463                 break;
1464         case KCFG_UNKNOWN:
1465         case KCFG_INT:
1466         case KCFG_CHAR_ARR:
1467         default:
1468                 pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
1469                         ext->name, value);
1470                 return -EINVAL;
1471         }
1472         ext->is_set = true;
1473         return 0;
1474 }
1475
1476 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1477                               const char *value)
1478 {
1479         size_t len;
1480
1481         if (ext->kcfg.type != KCFG_CHAR_ARR) {
1482                 pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
1483                 return -EINVAL;
1484         }
1485
1486         len = strlen(value);
1487         if (value[len - 1] != '"') {
1488                 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1489                         ext->name, value);
1490                 return -EINVAL;
1491         }
1492
1493         /* strip quotes */
1494         len -= 2;
1495         if (len >= ext->kcfg.sz) {
1496                 pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
1497                         ext->name, value, len, ext->kcfg.sz - 1);
1498                 len = ext->kcfg.sz - 1;
1499         }
1500         memcpy(ext_val, value + 1, len);
1501         ext_val[len] = '\0';
1502         ext->is_set = true;
1503         return 0;
1504 }
1505
1506 static int parse_u64(const char *value, __u64 *res)
1507 {
1508         char *value_end;
1509         int err;
1510
1511         errno = 0;
1512         *res = strtoull(value, &value_end, 0);
1513         if (errno) {
1514                 err = -errno;
1515                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1516                 return err;
1517         }
1518         if (*value_end) {
1519                 pr_warn("failed to parse '%s' as integer completely\n", value);
1520                 return -EINVAL;
1521         }
1522         return 0;
1523 }
1524
1525 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1526 {
1527         int bit_sz = ext->kcfg.sz * 8;
1528
1529         if (ext->kcfg.sz == 8)
1530                 return true;
1531
1532         /* Validate that value stored in u64 fits in integer of `ext->sz`
1533          * bytes size without any loss of information. If the target integer
1534          * is signed, we rely on the following limits of integer type of
1535          * Y bits and subsequent transformation:
1536          *
1537          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1538          *            0 <= X + 2^(Y-1) <= 2^Y - 1
1539          *            0 <= X + 2^(Y-1) <  2^Y
1540          *
1541          *  For unsigned target integer, check that all the (64 - Y) bits are
1542          *  zero.
1543          */
1544         if (ext->kcfg.is_signed)
1545                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1546         else
1547                 return (v >> bit_sz) == 0;
1548 }
1549
1550 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1551                               __u64 value)
1552 {
1553         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1554                 pr_warn("extern (kcfg) %s=%llu should be integer\n",
1555                         ext->name, (unsigned long long)value);
1556                 return -EINVAL;
1557         }
1558         if (!is_kcfg_value_in_range(ext, value)) {
1559                 pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
1560                         ext->name, (unsigned long long)value, ext->kcfg.sz);
1561                 return -ERANGE;
1562         }
1563         switch (ext->kcfg.sz) {
1564                 case 1: *(__u8 *)ext_val = value; break;
1565                 case 2: *(__u16 *)ext_val = value; break;
1566                 case 4: *(__u32 *)ext_val = value; break;
1567                 case 8: *(__u64 *)ext_val = value; break;
1568                 default:
1569                         return -EINVAL;
1570         }
1571         ext->is_set = true;
1572         return 0;
1573 }
1574
1575 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1576                                             char *buf, void *data)
1577 {
1578         struct extern_desc *ext;
1579         char *sep, *value;
1580         int len, err = 0;
1581         void *ext_val;
1582         __u64 num;
1583
1584         if (strncmp(buf, "CONFIG_", 7))
1585                 return 0;
1586
1587         sep = strchr(buf, '=');
1588         if (!sep) {
1589                 pr_warn("failed to parse '%s': no separator\n", buf);
1590                 return -EINVAL;
1591         }
1592
1593         /* Trim ending '\n' */
1594         len = strlen(buf);
1595         if (buf[len - 1] == '\n')
1596                 buf[len - 1] = '\0';
1597         /* Split on '=' and ensure that a value is present. */
1598         *sep = '\0';
1599         if (!sep[1]) {
1600                 *sep = '=';
1601                 pr_warn("failed to parse '%s': no value\n", buf);
1602                 return -EINVAL;
1603         }
1604
1605         ext = find_extern_by_name(obj, buf);
1606         if (!ext || ext->is_set)
1607                 return 0;
1608
1609         ext_val = data + ext->kcfg.data_off;
1610         value = sep + 1;
1611
1612         switch (*value) {
1613         case 'y': case 'n': case 'm':
1614                 err = set_kcfg_value_tri(ext, ext_val, *value);
1615                 break;
1616         case '"':
1617                 err = set_kcfg_value_str(ext, ext_val, value);
1618                 break;
1619         default:
1620                 /* assume integer */
1621                 err = parse_u64(value, &num);
1622                 if (err) {
1623                         pr_warn("extern (kcfg) %s=%s should be integer\n",
1624                                 ext->name, value);
1625                         return err;
1626                 }
1627                 err = set_kcfg_value_num(ext, ext_val, num);
1628                 break;
1629         }
1630         if (err)
1631                 return err;
1632         pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
1633         return 0;
1634 }
1635
1636 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1637 {
1638         char buf[PATH_MAX];
1639         struct utsname uts;
1640         int len, err = 0;
1641         gzFile file;
1642
1643         uname(&uts);
1644         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1645         if (len < 0)
1646                 return -EINVAL;
1647         else if (len >= PATH_MAX)
1648                 return -ENAMETOOLONG;
1649
1650         /* gzopen also accepts uncompressed files. */
1651         file = gzopen(buf, "r");
1652         if (!file)
1653                 file = gzopen("/proc/config.gz", "r");
1654
1655         if (!file) {
1656                 pr_warn("failed to open system Kconfig\n");
1657                 return -ENOENT;
1658         }
1659
1660         while (gzgets(file, buf, sizeof(buf))) {
1661                 err = bpf_object__process_kconfig_line(obj, buf, data);
1662                 if (err) {
1663                         pr_warn("error parsing system Kconfig line '%s': %d\n",
1664                                 buf, err);
1665                         goto out;
1666                 }
1667         }
1668
1669 out:
1670         gzclose(file);
1671         return err;
1672 }
1673
1674 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1675                                         const char *config, void *data)
1676 {
1677         char buf[PATH_MAX];
1678         int err = 0;
1679         FILE *file;
1680
1681         file = fmemopen((void *)config, strlen(config), "r");
1682         if (!file) {
1683                 err = -errno;
1684                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
1685                 return err;
1686         }
1687
1688         while (fgets(buf, sizeof(buf), file)) {
1689                 err = bpf_object__process_kconfig_line(obj, buf, data);
1690                 if (err) {
1691                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1692                                 buf, err);
1693                         break;
1694                 }
1695         }
1696
1697         fclose(file);
1698         return err;
1699 }
1700
1701 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
1702 {
1703         struct extern_desc *last_ext = NULL, *ext;
1704         size_t map_sz;
1705         int i, err;
1706
1707         for (i = 0; i < obj->nr_extern; i++) {
1708                 ext = &obj->externs[i];
1709                 if (ext->type == EXT_KCFG)
1710                         last_ext = ext;
1711         }
1712
1713         if (!last_ext)
1714                 return 0;
1715
1716         map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
1717         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
1718                                             obj->efile.symbols_shndx,
1719                                             NULL, map_sz);
1720         if (err)
1721                 return err;
1722
1723         obj->kconfig_map_idx = obj->nr_maps - 1;
1724
1725         return 0;
1726 }
1727
1728 static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
1729 {
1730         Elf_Data *symbols = obj->efile.symbols;
1731         int i, map_def_sz = 0, nr_maps = 0, nr_syms;
1732         Elf_Data *data = NULL;
1733         Elf_Scn *scn;
1734
1735         if (obj->efile.maps_shndx < 0)
1736                 return 0;
1737
1738         if (!symbols)
1739                 return -EINVAL;
1740
1741         scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
1742         if (scn)
1743                 data = elf_getdata(scn, NULL);
1744         if (!scn || !data) {
1745                 pr_warn("failed to get Elf_Data from map section %d\n",
1746                         obj->efile.maps_shndx);
1747                 return -EINVAL;
1748         }
1749
1750         /*
1751          * Count number of maps. Each map has a name.
1752          * Array of maps is not supported: only the first element is
1753          * considered.
1754          *
1755          * TODO: Detect array of map and report error.
1756          */
1757         nr_syms = symbols->d_size / sizeof(GElf_Sym);
1758         for (i = 0; i < nr_syms; i++) {
1759                 GElf_Sym sym;
1760
1761                 if (!gelf_getsym(symbols, i, &sym))
1762                         continue;
1763                 if (sym.st_shndx != obj->efile.maps_shndx)
1764                         continue;
1765                 nr_maps++;
1766         }
1767         /* Assume equally sized map definitions */
1768         pr_debug("maps in %s: %d maps in %zd bytes\n",
1769                  obj->path, nr_maps, data->d_size);
1770
1771         if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
1772                 pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n",
1773                         obj->path, nr_maps, data->d_size);
1774                 return -EINVAL;
1775         }
1776         map_def_sz = data->d_size / nr_maps;
1777
1778         /* Fill obj->maps using data in "maps" section.  */
1779         for (i = 0; i < nr_syms; i++) {
1780                 GElf_Sym sym;
1781                 const char *map_name;
1782                 struct bpf_map_def *def;
1783                 struct bpf_map *map;
1784
1785                 if (!gelf_getsym(symbols, i, &sym))
1786                         continue;
1787                 if (sym.st_shndx != obj->efile.maps_shndx)
1788                         continue;
1789
1790                 map = bpf_object__add_map(obj);
1791                 if (IS_ERR(map))
1792                         return PTR_ERR(map);
1793
1794                 map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
1795                                       sym.st_name);
1796                 if (!map_name) {
1797                         pr_warn("failed to get map #%d name sym string for obj %s\n",
1798                                 i, obj->path);
1799                         return -LIBBPF_ERRNO__FORMAT;
1800                 }
1801
1802                 map->libbpf_type = LIBBPF_MAP_UNSPEC;
1803                 map->sec_idx = sym.st_shndx;
1804                 map->sec_offset = sym.st_value;
1805                 pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
1806                          map_name, map->sec_idx, map->sec_offset);
1807                 if (sym.st_value + map_def_sz > data->d_size) {
1808                         pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
1809                                 obj->path, map_name);
1810                         return -EINVAL;
1811                 }
1812
1813                 map->name = strdup(map_name);
1814                 if (!map->name) {
1815                         pr_warn("failed to alloc map name\n");
1816                         return -ENOMEM;
1817                 }
1818                 pr_debug("map %d is \"%s\"\n", i, map->name);
1819                 def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
1820                 /*
1821                  * If the definition of the map in the object file fits in
1822                  * bpf_map_def, copy it.  Any extra fields in our version
1823                  * of bpf_map_def will default to zero as a result of the
1824                  * calloc above.
1825                  */
1826                 if (map_def_sz <= sizeof(struct bpf_map_def)) {
1827                         memcpy(&map->def, def, map_def_sz);
1828                 } else {
1829                         /*
1830                          * Here the map structure being read is bigger than what
1831                          * we expect, truncate if the excess bits are all zero.
1832                          * If they are not zero, reject this map as
1833                          * incompatible.
1834                          */
1835                         char *b;
1836
1837                         for (b = ((char *)def) + sizeof(struct bpf_map_def);
1838                              b < ((char *)def) + map_def_sz; b++) {
1839                                 if (*b != 0) {
1840                                         pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
1841                                                 obj->path, map_name);
1842                                         if (strict)
1843                                                 return -EINVAL;
1844                                 }
1845                         }
1846                         memcpy(&map->def, def, sizeof(struct bpf_map_def));
1847                 }
1848         }
1849         return 0;
1850 }
1851
1852 static const struct btf_type *
1853 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
1854 {
1855         const struct btf_type *t = btf__type_by_id(btf, id);
1856
1857         if (res_id)
1858                 *res_id = id;
1859
1860         while (btf_is_mod(t) || btf_is_typedef(t)) {
1861                 if (res_id)
1862                         *res_id = t->type;
1863                 t = btf__type_by_id(btf, t->type);
1864         }
1865
1866         return t;
1867 }
1868
1869 static const struct btf_type *
1870 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
1871 {
1872         const struct btf_type *t;
1873
1874         t = skip_mods_and_typedefs(btf, id, NULL);
1875         if (!btf_is_ptr(t))
1876                 return NULL;
1877
1878         t = skip_mods_and_typedefs(btf, t->type, res_id);
1879
1880         return btf_is_func_proto(t) ? t : NULL;
1881 }
1882
1883 static const char *btf_kind_str(const struct btf_type *t)
1884 {
1885         switch (btf_kind(t)) {
1886         case BTF_KIND_UNKN: return "void";
1887         case BTF_KIND_INT: return "int";
1888         case BTF_KIND_PTR: return "ptr";
1889         case BTF_KIND_ARRAY: return "array";
1890         case BTF_KIND_STRUCT: return "struct";
1891         case BTF_KIND_UNION: return "union";
1892         case BTF_KIND_ENUM: return "enum";
1893         case BTF_KIND_FWD: return "fwd";
1894         case BTF_KIND_TYPEDEF: return "typedef";
1895         case BTF_KIND_VOLATILE: return "volatile";
1896         case BTF_KIND_CONST: return "const";
1897         case BTF_KIND_RESTRICT: return "restrict";
1898         case BTF_KIND_FUNC: return "func";
1899         case BTF_KIND_FUNC_PROTO: return "func_proto";
1900         case BTF_KIND_VAR: return "var";
1901         case BTF_KIND_DATASEC: return "datasec";
1902         default: return "unknown";
1903         }
1904 }
1905
1906 /*
1907  * Fetch integer attribute of BTF map definition. Such attributes are
1908  * represented using a pointer to an array, in which dimensionality of array
1909  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
1910  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
1911  * type definition, while using only sizeof(void *) space in ELF data section.
1912  */
1913 static bool get_map_field_int(const char *map_name, const struct btf *btf,
1914                               const struct btf_member *m, __u32 *res)
1915 {
1916         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
1917         const char *name = btf__name_by_offset(btf, m->name_off);
1918         const struct btf_array *arr_info;
1919         const struct btf_type *arr_t;
1920
1921         if (!btf_is_ptr(t)) {
1922                 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
1923                         map_name, name, btf_kind_str(t));
1924                 return false;
1925         }
1926
1927         arr_t = btf__type_by_id(btf, t->type);
1928         if (!arr_t) {
1929                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
1930                         map_name, name, t->type);
1931                 return false;
1932         }
1933         if (!btf_is_array(arr_t)) {
1934                 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
1935                         map_name, name, btf_kind_str(arr_t));
1936                 return false;
1937         }
1938         arr_info = btf_array(arr_t);
1939         *res = arr_info->nelems;
1940         return true;
1941 }
1942
1943 static int build_map_pin_path(struct bpf_map *map, const char *path)
1944 {
1945         char buf[PATH_MAX];
1946         int err, len;
1947
1948         if (!path)
1949                 path = "/sys/fs/bpf";
1950
1951         len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
1952         if (len < 0)
1953                 return -EINVAL;
1954         else if (len >= PATH_MAX)
1955                 return -ENAMETOOLONG;
1956
1957         err = bpf_map__set_pin_path(map, buf);
1958         if (err)
1959                 return err;
1960
1961         return 0;
1962 }
1963
1964
1965 static int parse_btf_map_def(struct bpf_object *obj,
1966                              struct bpf_map *map,
1967                              const struct btf_type *def,
1968                              bool strict, bool is_inner,
1969                              const char *pin_root_path)
1970 {
1971         const struct btf_type *t;
1972         const struct btf_member *m;
1973         int vlen, i;
1974
1975         vlen = btf_vlen(def);
1976         m = btf_members(def);
1977         for (i = 0; i < vlen; i++, m++) {
1978                 const char *name = btf__name_by_offset(obj->btf, m->name_off);
1979
1980                 if (!name) {
1981                         pr_warn("map '%s': invalid field #%d.\n", map->name, i);
1982                         return -EINVAL;
1983                 }
1984                 if (strcmp(name, "type") == 0) {
1985                         if (!get_map_field_int(map->name, obj->btf, m,
1986                                                &map->def.type))
1987                                 return -EINVAL;
1988                         pr_debug("map '%s': found type = %u.\n",
1989                                  map->name, map->def.type);
1990                 } else if (strcmp(name, "max_entries") == 0) {
1991                         if (!get_map_field_int(map->name, obj->btf, m,
1992                                                &map->def.max_entries))
1993                                 return -EINVAL;
1994                         pr_debug("map '%s': found max_entries = %u.\n",
1995                                  map->name, map->def.max_entries);
1996                 } else if (strcmp(name, "map_flags") == 0) {
1997                         if (!get_map_field_int(map->name, obj->btf, m,
1998                                                &map->def.map_flags))
1999                                 return -EINVAL;
2000                         pr_debug("map '%s': found map_flags = %u.\n",
2001                                  map->name, map->def.map_flags);
2002                 } else if (strcmp(name, "numa_node") == 0) {
2003                         if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
2004                                 return -EINVAL;
2005                         pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
2006                 } else if (strcmp(name, "key_size") == 0) {
2007                         __u32 sz;
2008
2009                         if (!get_map_field_int(map->name, obj->btf, m, &sz))
2010                                 return -EINVAL;
2011                         pr_debug("map '%s': found key_size = %u.\n",
2012                                  map->name, sz);
2013                         if (map->def.key_size && map->def.key_size != sz) {
2014                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
2015                                         map->name, map->def.key_size, sz);
2016                                 return -EINVAL;
2017                         }
2018                         map->def.key_size = sz;
2019                 } else if (strcmp(name, "key") == 0) {
2020                         __s64 sz;
2021
2022                         t = btf__type_by_id(obj->btf, m->type);
2023                         if (!t) {
2024                                 pr_warn("map '%s': key type [%d] not found.\n",
2025                                         map->name, m->type);
2026                                 return -EINVAL;
2027                         }
2028                         if (!btf_is_ptr(t)) {
2029                                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2030                                         map->name, btf_kind_str(t));
2031                                 return -EINVAL;
2032                         }
2033                         sz = btf__resolve_size(obj->btf, t->type);
2034                         if (sz < 0) {
2035                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2036                                         map->name, t->type, (ssize_t)sz);
2037                                 return sz;
2038                         }
2039                         pr_debug("map '%s': found key [%u], sz = %zd.\n",
2040                                  map->name, t->type, (ssize_t)sz);
2041                         if (map->def.key_size && map->def.key_size != sz) {
2042                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2043                                         map->name, map->def.key_size, (ssize_t)sz);
2044                                 return -EINVAL;
2045                         }
2046                         map->def.key_size = sz;
2047                         map->btf_key_type_id = t->type;
2048                 } else if (strcmp(name, "value_size") == 0) {
2049                         __u32 sz;
2050
2051                         if (!get_map_field_int(map->name, obj->btf, m, &sz))
2052                                 return -EINVAL;
2053                         pr_debug("map '%s': found value_size = %u.\n",
2054                                  map->name, sz);
2055                         if (map->def.value_size && map->def.value_size != sz) {
2056                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2057                                         map->name, map->def.value_size, sz);
2058                                 return -EINVAL;
2059                         }
2060                         map->def.value_size = sz;
2061                 } else if (strcmp(name, "value") == 0) {
2062                         __s64 sz;
2063
2064                         t = btf__type_by_id(obj->btf, m->type);
2065                         if (!t) {
2066                                 pr_warn("map '%s': value type [%d] not found.\n",
2067                                         map->name, m->type);
2068                                 return -EINVAL;
2069                         }
2070                         if (!btf_is_ptr(t)) {
2071                                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2072                                         map->name, btf_kind_str(t));
2073                                 return -EINVAL;
2074                         }
2075                         sz = btf__resolve_size(obj->btf, t->type);
2076                         if (sz < 0) {
2077                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2078                                         map->name, t->type, (ssize_t)sz);
2079                                 return sz;
2080                         }
2081                         pr_debug("map '%s': found value [%u], sz = %zd.\n",
2082                                  map->name, t->type, (ssize_t)sz);
2083                         if (map->def.value_size && map->def.value_size != sz) {
2084                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2085                                         map->name, map->def.value_size, (ssize_t)sz);
2086                                 return -EINVAL;
2087                         }
2088                         map->def.value_size = sz;
2089                         map->btf_value_type_id = t->type;
2090                 }
2091                 else if (strcmp(name, "values") == 0) {
2092                         int err;
2093
2094                         if (is_inner) {
2095                                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2096                                         map->name);
2097                                 return -ENOTSUP;
2098                         }
2099                         if (i != vlen - 1) {
2100                                 pr_warn("map '%s': '%s' member should be last.\n",
2101                                         map->name, name);
2102                                 return -EINVAL;
2103                         }
2104                         if (!bpf_map_type__is_map_in_map(map->def.type)) {
2105                                 pr_warn("map '%s': should be map-in-map.\n",
2106                                         map->name);
2107                                 return -ENOTSUP;
2108                         }
2109                         if (map->def.value_size && map->def.value_size != 4) {
2110                                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2111                                         map->name, map->def.value_size);
2112                                 return -EINVAL;
2113                         }
2114                         map->def.value_size = 4;
2115                         t = btf__type_by_id(obj->btf, m->type);
2116                         if (!t) {
2117                                 pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
2118                                         map->name, m->type);
2119                                 return -EINVAL;
2120                         }
2121                         if (!btf_is_array(t) || btf_array(t)->nelems) {
2122                                 pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
2123                                         map->name);
2124                                 return -EINVAL;
2125                         }
2126                         t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
2127                                                    NULL);
2128                         if (!btf_is_ptr(t)) {
2129                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2130                                         map->name, btf_kind_str(t));
2131                                 return -EINVAL;
2132                         }
2133                         t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
2134                         if (!btf_is_struct(t)) {
2135                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2136                                         map->name, btf_kind_str(t));
2137                                 return -EINVAL;
2138                         }
2139
2140                         map->inner_map = calloc(1, sizeof(*map->inner_map));
2141                         if (!map->inner_map)
2142                                 return -ENOMEM;
2143                         map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
2144                         map->inner_map->name = malloc(strlen(map->name) +
2145                                                       sizeof(".inner") + 1);
2146                         if (!map->inner_map->name)
2147                                 return -ENOMEM;
2148                         sprintf(map->inner_map->name, "%s.inner", map->name);
2149
2150                         err = parse_btf_map_def(obj, map->inner_map, t, strict,
2151                                                 true /* is_inner */, NULL);
2152                         if (err)
2153                                 return err;
2154                 } else if (strcmp(name, "pinning") == 0) {
2155                         __u32 val;
2156                         int err;
2157
2158                         if (is_inner) {
2159                                 pr_debug("map '%s': inner def can't be pinned.\n",
2160                                          map->name);
2161                                 return -EINVAL;
2162                         }
2163                         if (!get_map_field_int(map->name, obj->btf, m, &val))
2164                                 return -EINVAL;
2165                         pr_debug("map '%s': found pinning = %u.\n",
2166                                  map->name, val);
2167
2168                         if (val != LIBBPF_PIN_NONE &&
2169                             val != LIBBPF_PIN_BY_NAME) {
2170                                 pr_warn("map '%s': invalid pinning value %u.\n",
2171                                         map->name, val);
2172                                 return -EINVAL;
2173                         }
2174                         if (val == LIBBPF_PIN_BY_NAME) {
2175                                 err = build_map_pin_path(map, pin_root_path);
2176                                 if (err) {
2177                                         pr_warn("map '%s': couldn't build pin path.\n",
2178                                                 map->name);
2179                                         return err;
2180                                 }
2181                         }
2182                 } else {
2183                         if (strict) {
2184                                 pr_warn("map '%s': unknown field '%s'.\n",
2185                                         map->name, name);
2186                                 return -ENOTSUP;
2187                         }
2188                         pr_debug("map '%s': ignoring unknown field '%s'.\n",
2189                                  map->name, name);
2190                 }
2191         }
2192
2193         if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
2194                 pr_warn("map '%s': map type isn't specified.\n", map->name);
2195                 return -EINVAL;
2196         }
2197
2198         return 0;
2199 }
2200
2201 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2202                                          const struct btf_type *sec,
2203                                          int var_idx, int sec_idx,
2204                                          const Elf_Data *data, bool strict,
2205                                          const char *pin_root_path)
2206 {
2207         const struct btf_type *var, *def;
2208         const struct btf_var_secinfo *vi;
2209         const struct btf_var *var_extra;
2210         const char *map_name;
2211         struct bpf_map *map;
2212
2213         vi = btf_var_secinfos(sec) + var_idx;
2214         var = btf__type_by_id(obj->btf, vi->type);
2215         var_extra = btf_var(var);
2216         map_name = btf__name_by_offset(obj->btf, var->name_off);
2217
2218         if (map_name == NULL || map_name[0] == '\0') {
2219                 pr_warn("map #%d: empty name.\n", var_idx);
2220                 return -EINVAL;
2221         }
2222         if ((__u64)vi->offset + vi->size > data->d_size) {
2223                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2224                 return -EINVAL;
2225         }
2226         if (!btf_is_var(var)) {
2227                 pr_warn("map '%s': unexpected var kind %s.\n",
2228                         map_name, btf_kind_str(var));
2229                 return -EINVAL;
2230         }
2231         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
2232             var_extra->linkage != BTF_VAR_STATIC) {
2233                 pr_warn("map '%s': unsupported var linkage %u.\n",
2234                         map_name, var_extra->linkage);
2235                 return -EOPNOTSUPP;
2236         }
2237
2238         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2239         if (!btf_is_struct(def)) {
2240                 pr_warn("map '%s': unexpected def kind %s.\n",
2241                         map_name, btf_kind_str(var));
2242                 return -EINVAL;
2243         }
2244         if (def->size > vi->size) {
2245                 pr_warn("map '%s': invalid def size.\n", map_name);
2246                 return -EINVAL;
2247         }
2248
2249         map = bpf_object__add_map(obj);
2250         if (IS_ERR(map))
2251                 return PTR_ERR(map);
2252         map->name = strdup(map_name);
2253         if (!map->name) {
2254                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2255                 return -ENOMEM;
2256         }
2257         map->libbpf_type = LIBBPF_MAP_UNSPEC;
2258         map->def.type = BPF_MAP_TYPE_UNSPEC;
2259         map->sec_idx = sec_idx;
2260         map->sec_offset = vi->offset;
2261         map->btf_var_idx = var_idx;
2262         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2263                  map_name, map->sec_idx, map->sec_offset);
2264
2265         return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
2266 }
2267
2268 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2269                                           const char *pin_root_path)
2270 {
2271         const struct btf_type *sec = NULL;
2272         int nr_types, i, vlen, err;
2273         const struct btf_type *t;
2274         const char *name;
2275         Elf_Data *data;
2276         Elf_Scn *scn;
2277
2278         if (obj->efile.btf_maps_shndx < 0)
2279                 return 0;
2280
2281         scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
2282         if (scn)
2283                 data = elf_getdata(scn, NULL);
2284         if (!scn || !data) {
2285                 pr_warn("failed to get Elf_Data from map section %d (%s)\n",
2286                         obj->efile.maps_shndx, MAPS_ELF_SEC);
2287                 return -EINVAL;
2288         }
2289
2290         nr_types = btf__get_nr_types(obj->btf);
2291         for (i = 1; i <= nr_types; i++) {
2292                 t = btf__type_by_id(obj->btf, i);
2293                 if (!btf_is_datasec(t))
2294                         continue;
2295                 name = btf__name_by_offset(obj->btf, t->name_off);
2296                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2297                         sec = t;
2298                         obj->efile.btf_maps_sec_btf_id = i;
2299                         break;
2300                 }
2301         }
2302
2303         if (!sec) {
2304                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2305                 return -ENOENT;
2306         }
2307
2308         vlen = btf_vlen(sec);
2309         for (i = 0; i < vlen; i++) {
2310                 err = bpf_object__init_user_btf_map(obj, sec, i,
2311                                                     obj->efile.btf_maps_shndx,
2312                                                     data, strict,
2313                                                     pin_root_path);
2314                 if (err)
2315                         return err;
2316         }
2317
2318         return 0;
2319 }
2320
2321 static int bpf_object__init_maps(struct bpf_object *obj,
2322                                  const struct bpf_object_open_opts *opts)
2323 {
2324         const char *pin_root_path;
2325         bool strict;
2326         int err;
2327
2328         strict = !OPTS_GET(opts, relaxed_maps, false);
2329         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2330
2331         err = bpf_object__init_user_maps(obj, strict);
2332         err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2333         err = err ?: bpf_object__init_global_data_maps(obj);
2334         err = err ?: bpf_object__init_kconfig_map(obj);
2335         err = err ?: bpf_object__init_struct_ops_maps(obj);
2336         if (err)
2337                 return err;
2338
2339         return 0;
2340 }
2341
2342 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2343 {
2344         Elf_Scn *scn;
2345         GElf_Shdr sh;
2346
2347         scn = elf_getscn(obj->efile.elf, idx);
2348         if (!scn)
2349                 return false;
2350
2351         if (gelf_getshdr(scn, &sh) != &sh)
2352                 return false;
2353
2354         if (sh.sh_flags & SHF_EXECINSTR)
2355                 return true;
2356
2357         return false;
2358 }
2359
2360 static bool btf_needs_sanitization(struct bpf_object *obj)
2361 {
2362         bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
2363         bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
2364         bool has_func = kernel_supports(FEAT_BTF_FUNC);
2365
2366         return !has_func || !has_datasec || !has_func_global;
2367 }
2368
2369 static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2370 {
2371         bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
2372         bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
2373         bool has_func = kernel_supports(FEAT_BTF_FUNC);
2374         struct btf_type *t;
2375         int i, j, vlen;
2376
2377         for (i = 1; i <= btf__get_nr_types(btf); i++) {
2378                 t = (struct btf_type *)btf__type_by_id(btf, i);
2379
2380                 if (!has_datasec && btf_is_var(t)) {
2381                         /* replace VAR with INT */
2382                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2383                         /*
2384                          * using size = 1 is the safest choice, 4 will be too
2385                          * big and cause kernel BTF validation failure if
2386                          * original variable took less than 4 bytes
2387                          */
2388                         t->size = 1;
2389                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2390                 } else if (!has_datasec && btf_is_datasec(t)) {
2391                         /* replace DATASEC with STRUCT */
2392                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
2393                         struct btf_member *m = btf_members(t);
2394                         struct btf_type *vt;
2395                         char *name;
2396
2397                         name = (char *)btf__name_by_offset(btf, t->name_off);
2398                         while (*name) {
2399                                 if (*name == '.')
2400                                         *name = '_';
2401                                 name++;
2402                         }
2403
2404                         vlen = btf_vlen(t);
2405                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2406                         for (j = 0; j < vlen; j++, v++, m++) {
2407                                 /* order of field assignments is important */
2408                                 m->offset = v->offset * 8;
2409                                 m->type = v->type;
2410                                 /* preserve variable name as member name */
2411                                 vt = (void *)btf__type_by_id(btf, v->type);
2412                                 m->name_off = vt->name_off;
2413                         }
2414                 } else if (!has_func && btf_is_func_proto(t)) {
2415                         /* replace FUNC_PROTO with ENUM */
2416                         vlen = btf_vlen(t);
2417                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2418                         t->size = sizeof(__u32); /* kernel enforced */
2419                 } else if (!has_func && btf_is_func(t)) {
2420                         /* replace FUNC with TYPEDEF */
2421                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2422                 } else if (!has_func_global && btf_is_func(t)) {
2423                         /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2424                         t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2425                 }
2426         }
2427 }
2428
2429 static bool libbpf_needs_btf(const struct bpf_object *obj)
2430 {
2431         return obj->efile.btf_maps_shndx >= 0 ||
2432                obj->efile.st_ops_shndx >= 0 ||
2433                obj->nr_extern > 0;
2434 }
2435
2436 static bool kernel_needs_btf(const struct bpf_object *obj)
2437 {
2438         return obj->efile.st_ops_shndx >= 0;
2439 }
2440
2441 static int bpf_object__init_btf(struct bpf_object *obj,
2442                                 Elf_Data *btf_data,
2443                                 Elf_Data *btf_ext_data)
2444 {
2445         int err = -ENOENT;
2446
2447         if (btf_data) {
2448                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2449                 if (IS_ERR(obj->btf)) {
2450                         err = PTR_ERR(obj->btf);
2451                         obj->btf = NULL;
2452                         pr_warn("Error loading ELF section %s: %d.\n",
2453                                 BTF_ELF_SEC, err);
2454                         goto out;
2455                 }
2456                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2457                 btf__set_pointer_size(obj->btf, 8);
2458                 err = 0;
2459         }
2460         if (btf_ext_data) {
2461                 if (!obj->btf) {
2462                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2463                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2464                         goto out;
2465                 }
2466                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
2467                                             btf_ext_data->d_size);
2468                 if (IS_ERR(obj->btf_ext)) {
2469                         pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
2470                                 BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
2471                         obj->btf_ext = NULL;
2472                         goto out;
2473                 }
2474         }
2475 out:
2476         if (err && libbpf_needs_btf(obj)) {
2477                 pr_warn("BTF is required, but is missing or corrupted.\n");
2478                 return err;
2479         }
2480         return 0;
2481 }
2482
2483 static int bpf_object__finalize_btf(struct bpf_object *obj)
2484 {
2485         int err;
2486
2487         if (!obj->btf)
2488                 return 0;
2489
2490         err = btf__finalize_data(obj, obj->btf);
2491         if (err) {
2492                 pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
2493                 return err;
2494         }
2495
2496         return 0;
2497 }
2498
2499 static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
2500 {
2501         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
2502             prog->type == BPF_PROG_TYPE_LSM)
2503                 return true;
2504
2505         /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
2506          * also need vmlinux BTF
2507          */
2508         if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
2509                 return true;
2510
2511         return false;
2512 }
2513
2514 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
2515 {
2516         bool need_vmlinux_btf = false;
2517         struct bpf_program *prog;
2518         int err;
2519
2520         /* CO-RE relocations need kernel BTF */
2521         if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
2522                 need_vmlinux_btf = true;
2523
2524         bpf_object__for_each_program(prog, obj) {
2525                 if (!prog->load)
2526                         continue;
2527                 if (libbpf_prog_needs_vmlinux_btf(prog)) {
2528                         need_vmlinux_btf = true;
2529                         break;
2530                 }
2531         }
2532
2533         if (!need_vmlinux_btf)
2534                 return 0;
2535
2536         obj->btf_vmlinux = libbpf_find_kernel_btf();
2537         if (IS_ERR(obj->btf_vmlinux)) {
2538                 err = PTR_ERR(obj->btf_vmlinux);
2539                 pr_warn("Error loading vmlinux BTF: %d\n", err);
2540                 obj->btf_vmlinux = NULL;
2541                 return err;
2542         }
2543         return 0;
2544 }
2545
2546 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
2547 {
2548         struct btf *kern_btf = obj->btf;
2549         bool btf_mandatory, sanitize;
2550         int err = 0;
2551
2552         if (!obj->btf)
2553                 return 0;
2554
2555         if (!kernel_supports(FEAT_BTF)) {
2556                 if (kernel_needs_btf(obj)) {
2557                         err = -EOPNOTSUPP;
2558                         goto report;
2559                 }
2560                 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
2561                 return 0;
2562         }
2563
2564         sanitize = btf_needs_sanitization(obj);
2565         if (sanitize) {
2566                 const void *raw_data;
2567                 __u32 sz;
2568
2569                 /* clone BTF to sanitize a copy and leave the original intact */
2570                 raw_data = btf__get_raw_data(obj->btf, &sz);
2571                 kern_btf = btf__new(raw_data, sz);
2572                 if (IS_ERR(kern_btf))
2573                         return PTR_ERR(kern_btf);
2574
2575                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2576                 btf__set_pointer_size(obj->btf, 8);
2577                 bpf_object__sanitize_btf(obj, kern_btf);
2578         }
2579
2580         err = btf__load(kern_btf);
2581         if (sanitize) {
2582                 if (!err) {
2583                         /* move fd to libbpf's BTF */
2584                         btf__set_fd(obj->btf, btf__fd(kern_btf));
2585                         btf__set_fd(kern_btf, -1);
2586                 }
2587                 btf__free(kern_btf);
2588         }
2589 report:
2590         if (err) {
2591                 btf_mandatory = kernel_needs_btf(obj);
2592                 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
2593                         btf_mandatory ? "BTF is mandatory, can't proceed."
2594                                       : "BTF is optional, ignoring.");
2595                 if (!btf_mandatory)
2596                         err = 0;
2597         }
2598         return err;
2599 }
2600
2601 static int bpf_object__elf_collect(struct bpf_object *obj)
2602 {
2603         Elf *elf = obj->efile.elf;
2604         GElf_Ehdr *ep = &obj->efile.ehdr;
2605         Elf_Data *btf_ext_data = NULL;
2606         Elf_Data *btf_data = NULL;
2607         Elf_Scn *scn = NULL;
2608         int idx = 0, err = 0;
2609
2610         /* Elf is corrupted/truncated, avoid calling elf_strptr. */
2611         if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
2612                 pr_warn("failed to get e_shstrndx from %s\n", obj->path);
2613                 return -LIBBPF_ERRNO__FORMAT;
2614         }
2615
2616         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2617                 char *name;
2618                 GElf_Shdr sh;
2619                 Elf_Data *data;
2620
2621                 idx++;
2622                 if (gelf_getshdr(scn, &sh) != &sh) {
2623                         pr_warn("failed to get section(%d) header from %s\n",
2624                                 idx, obj->path);
2625                         return -LIBBPF_ERRNO__FORMAT;
2626                 }
2627
2628                 name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
2629                 if (!name) {
2630                         pr_warn("failed to get section(%d) name from %s\n",
2631                                 idx, obj->path);
2632                         return -LIBBPF_ERRNO__FORMAT;
2633                 }
2634
2635                 data = elf_getdata(scn, 0);
2636                 if (!data) {
2637                         pr_warn("failed to get section(%d) data from %s(%s)\n",
2638                                 idx, name, obj->path);
2639                         return -LIBBPF_ERRNO__FORMAT;
2640                 }
2641                 pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
2642                          idx, name, (unsigned long)data->d_size,
2643                          (int)sh.sh_link, (unsigned long)sh.sh_flags,
2644                          (int)sh.sh_type);
2645
2646                 if (strcmp(name, "license") == 0) {
2647                         err = bpf_object__init_license(obj,
2648                                                        data->d_buf,
2649                                                        data->d_size);
2650                         if (err)
2651                                 return err;
2652                 } else if (strcmp(name, "version") == 0) {
2653                         err = bpf_object__init_kversion(obj,
2654                                                         data->d_buf,
2655                                                         data->d_size);
2656                         if (err)
2657                                 return err;
2658                 } else if (strcmp(name, "maps") == 0) {
2659                         obj->efile.maps_shndx = idx;
2660                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
2661                         obj->efile.btf_maps_shndx = idx;
2662                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
2663                         btf_data = data;
2664                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
2665                         btf_ext_data = data;
2666                 } else if (sh.sh_type == SHT_SYMTAB) {
2667                         if (obj->efile.symbols) {
2668                                 pr_warn("bpf: multiple SYMTAB in %s\n",
2669                                         obj->path);
2670                                 return -LIBBPF_ERRNO__FORMAT;
2671                         }
2672                         obj->efile.symbols = data;
2673                         obj->efile.symbols_shndx = idx;
2674                         obj->efile.strtabidx = sh.sh_link;
2675                 } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
2676                         if (sh.sh_flags & SHF_EXECINSTR) {
2677                                 if (strcmp(name, ".text") == 0)
2678                                         obj->efile.text_shndx = idx;
2679                                 err = bpf_object__add_program(obj, data->d_buf,
2680                                                               data->d_size,
2681                                                               name, idx);
2682                                 if (err) {
2683                                         char errmsg[STRERR_BUFSIZE];
2684                                         char *cp;
2685
2686                                         cp = libbpf_strerror_r(-err, errmsg,
2687                                                                sizeof(errmsg));
2688                                         pr_warn("failed to alloc program %s (%s): %s",
2689                                                 name, obj->path, cp);
2690                                         return err;
2691                                 }
2692                         } else if (strcmp(name, DATA_SEC) == 0) {
2693                                 obj->efile.data = data;
2694                                 obj->efile.data_shndx = idx;
2695                         } else if (strcmp(name, RODATA_SEC) == 0) {
2696                                 obj->efile.rodata = data;
2697                                 obj->efile.rodata_shndx = idx;
2698                         } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
2699                                 obj->efile.st_ops_data = data;
2700                                 obj->efile.st_ops_shndx = idx;
2701                         } else {
2702                                 pr_debug("skip section(%d) %s\n", idx, name);
2703                         }
2704                 } else if (sh.sh_type == SHT_REL) {
2705                         int nr_sects = obj->efile.nr_reloc_sects;
2706                         void *sects = obj->efile.reloc_sects;
2707                         int sec = sh.sh_info; /* points to other section */
2708
2709                         /* Only do relo for section with exec instructions */
2710                         if (!section_have_execinstr(obj, sec) &&
2711                             strcmp(name, ".rel" STRUCT_OPS_SEC) &&
2712                             strcmp(name, ".rel" MAPS_ELF_SEC)) {
2713                                 pr_debug("skip relo %s(%d) for section(%d)\n",
2714                                          name, idx, sec);
2715                                 continue;
2716                         }
2717
2718                         sects = libbpf_reallocarray(sects, nr_sects + 1,
2719                                                     sizeof(*obj->efile.reloc_sects));
2720                         if (!sects) {
2721                                 pr_warn("reloc_sects realloc failed\n");
2722                                 return -ENOMEM;
2723                         }
2724
2725                         obj->efile.reloc_sects = sects;
2726                         obj->efile.nr_reloc_sects++;
2727
2728                         obj->efile.reloc_sects[nr_sects].shdr = sh;
2729                         obj->efile.reloc_sects[nr_sects].data = data;
2730                 } else if (sh.sh_type == SHT_NOBITS &&
2731                            strcmp(name, BSS_SEC) == 0) {
2732                         obj->efile.bss = data;
2733                         obj->efile.bss_shndx = idx;
2734                 } else {
2735                         pr_debug("skip section(%d) %s\n", idx, name);
2736                 }
2737         }
2738
2739         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
2740                 pr_warn("Corrupted ELF file: index of strtab invalid\n");
2741                 return -LIBBPF_ERRNO__FORMAT;
2742         }
2743         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
2744 }
2745
2746 static bool sym_is_extern(const GElf_Sym *sym)
2747 {
2748         int bind = GELF_ST_BIND(sym->st_info);
2749         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
2750         return sym->st_shndx == SHN_UNDEF &&
2751                (bind == STB_GLOBAL || bind == STB_WEAK) &&
2752                GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
2753 }
2754
2755 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
2756 {
2757         const struct btf_type *t;
2758         const char *var_name;
2759         int i, n;
2760
2761         if (!btf)
2762                 return -ESRCH;
2763
2764         n = btf__get_nr_types(btf);
2765         for (i = 1; i <= n; i++) {
2766                 t = btf__type_by_id(btf, i);
2767
2768                 if (!btf_is_var(t))
2769                         continue;
2770
2771                 var_name = btf__name_by_offset(btf, t->name_off);
2772                 if (strcmp(var_name, ext_name))
2773                         continue;
2774
2775                 if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
2776                         return -EINVAL;
2777
2778                 return i;
2779         }
2780
2781         return -ENOENT;
2782 }
2783
2784 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
2785         const struct btf_var_secinfo *vs;
2786         const struct btf_type *t;
2787         int i, j, n;
2788
2789         if (!btf)
2790                 return -ESRCH;
2791
2792         n = btf__get_nr_types(btf);
2793         for (i = 1; i <= n; i++) {
2794                 t = btf__type_by_id(btf, i);
2795
2796                 if (!btf_is_datasec(t))
2797                         continue;
2798
2799                 vs = btf_var_secinfos(t);
2800                 for (j = 0; j < btf_vlen(t); j++, vs++) {
2801                         if (vs->type == ext_btf_id)
2802                                 return i;
2803                 }
2804         }
2805
2806         return -ENOENT;
2807 }
2808
2809 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
2810                                      bool *is_signed)
2811 {
2812         const struct btf_type *t;
2813         const char *name;
2814
2815         t = skip_mods_and_typedefs(btf, id, NULL);
2816         name = btf__name_by_offset(btf, t->name_off);
2817
2818         if (is_signed)
2819                 *is_signed = false;
2820         switch (btf_kind(t)) {
2821         case BTF_KIND_INT: {
2822                 int enc = btf_int_encoding(t);
2823
2824                 if (enc & BTF_INT_BOOL)
2825                         return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
2826                 if (is_signed)
2827                         *is_signed = enc & BTF_INT_SIGNED;
2828                 if (t->size == 1)
2829                         return KCFG_CHAR;
2830                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
2831                         return KCFG_UNKNOWN;
2832                 return KCFG_INT;
2833         }
2834         case BTF_KIND_ENUM:
2835                 if (t->size != 4)
2836                         return KCFG_UNKNOWN;
2837                 if (strcmp(name, "libbpf_tristate"))
2838                         return KCFG_UNKNOWN;
2839                 return KCFG_TRISTATE;
2840         case BTF_KIND_ARRAY:
2841                 if (btf_array(t)->nelems == 0)
2842                         return KCFG_UNKNOWN;
2843                 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
2844                         return KCFG_UNKNOWN;
2845                 return KCFG_CHAR_ARR;
2846         default:
2847                 return KCFG_UNKNOWN;
2848         }
2849 }
2850
2851 static int cmp_externs(const void *_a, const void *_b)
2852 {
2853         const struct extern_desc *a = _a;
2854         const struct extern_desc *b = _b;
2855
2856         if (a->type != b->type)
2857                 return a->type < b->type ? -1 : 1;
2858
2859         if (a->type == EXT_KCFG) {
2860                 /* descending order by alignment requirements */
2861                 if (a->kcfg.align != b->kcfg.align)
2862                         return a->kcfg.align > b->kcfg.align ? -1 : 1;
2863                 /* ascending order by size, within same alignment class */
2864                 if (a->kcfg.sz != b->kcfg.sz)
2865                         return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
2866         }
2867
2868         /* resolve ties by name */
2869         return strcmp(a->name, b->name);
2870 }
2871
2872 static int find_int_btf_id(const struct btf *btf)
2873 {
2874         const struct btf_type *t;
2875         int i, n;
2876
2877         n = btf__get_nr_types(btf);
2878         for (i = 1; i <= n; i++) {
2879                 t = btf__type_by_id(btf, i);
2880
2881                 if (btf_is_int(t) && btf_int_bits(t) == 32)
2882                         return i;
2883         }
2884
2885         return 0;
2886 }
2887
2888 static int bpf_object__collect_externs(struct bpf_object *obj)
2889 {
2890         struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
2891         const struct btf_type *t;
2892         struct extern_desc *ext;
2893         int i, n, off;
2894         const char *ext_name, *sec_name;
2895         Elf_Scn *scn;
2896         GElf_Shdr sh;
2897
2898         if (!obj->efile.symbols)
2899                 return 0;
2900
2901         scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
2902         if (!scn)
2903                 return -LIBBPF_ERRNO__FORMAT;
2904         if (gelf_getshdr(scn, &sh) != &sh)
2905                 return -LIBBPF_ERRNO__FORMAT;
2906         n = sh.sh_size / sh.sh_entsize;
2907
2908         pr_debug("looking for externs among %d symbols...\n", n);
2909         for (i = 0; i < n; i++) {
2910                 GElf_Sym sym;
2911
2912                 if (!gelf_getsym(obj->efile.symbols, i, &sym))
2913                         return -LIBBPF_ERRNO__FORMAT;
2914                 if (!sym_is_extern(&sym))
2915                         continue;
2916                 ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
2917                                       sym.st_name);
2918                 if (!ext_name || !ext_name[0])
2919                         continue;
2920
2921                 ext = obj->externs;
2922                 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
2923                 if (!ext)
2924                         return -ENOMEM;
2925                 obj->externs = ext;
2926                 ext = &ext[obj->nr_extern];
2927                 memset(ext, 0, sizeof(*ext));
2928                 obj->nr_extern++;
2929
2930                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
2931                 if (ext->btf_id <= 0) {
2932                         pr_warn("failed to find BTF for extern '%s': %d\n",
2933                                 ext_name, ext->btf_id);
2934                         return ext->btf_id;
2935                 }
2936                 t = btf__type_by_id(obj->btf, ext->btf_id);
2937                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
2938                 ext->sym_idx = i;
2939                 ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
2940
2941                 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
2942                 if (ext->sec_btf_id <= 0) {
2943                         pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
2944                                 ext_name, ext->btf_id, ext->sec_btf_id);
2945                         return ext->sec_btf_id;
2946                 }
2947                 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
2948                 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
2949
2950                 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
2951                         kcfg_sec = sec;
2952                         ext->type = EXT_KCFG;
2953                         ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
2954                         if (ext->kcfg.sz <= 0) {
2955                                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
2956                                         ext_name, ext->kcfg.sz);
2957                                 return ext->kcfg.sz;
2958                         }
2959                         ext->kcfg.align = btf__align_of(obj->btf, t->type);
2960                         if (ext->kcfg.align <= 0) {
2961                                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
2962                                         ext_name, ext->kcfg.align);
2963                                 return -EINVAL;
2964                         }
2965                         ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
2966                                                         &ext->kcfg.is_signed);
2967                         if (ext->kcfg.type == KCFG_UNKNOWN) {
2968                                 pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
2969                                 return -ENOTSUP;
2970                         }
2971                 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
2972                         const struct btf_type *vt;
2973
2974                         ksym_sec = sec;
2975                         ext->type = EXT_KSYM;
2976
2977                         vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
2978                         if (!btf_is_void(vt)) {
2979                                 pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
2980                                 return -ENOTSUP;
2981                         }
2982                 } else {
2983                         pr_warn("unrecognized extern section '%s'\n", sec_name);
2984                         return -ENOTSUP;
2985                 }
2986         }
2987         pr_debug("collected %d externs total\n", obj->nr_extern);
2988
2989         if (!obj->nr_extern)
2990                 return 0;
2991
2992         /* sort externs by type, for kcfg ones also by (align, size, name) */
2993         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
2994
2995         /* for .ksyms section, we need to turn all externs into allocated
2996          * variables in BTF to pass kernel verification; we do this by
2997          * pretending that each extern is a 8-byte variable
2998          */
2999         if (ksym_sec) {
3000                 /* find existing 4-byte integer type in BTF to use for fake
3001                  * extern variables in DATASEC
3002                  */
3003                 int int_btf_id = find_int_btf_id(obj->btf);
3004
3005                 for (i = 0; i < obj->nr_extern; i++) {
3006                         ext = &obj->externs[i];
3007                         if (ext->type != EXT_KSYM)
3008                                 continue;
3009                         pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3010                                  i, ext->sym_idx, ext->name);
3011                 }
3012
3013                 sec = ksym_sec;
3014                 n = btf_vlen(sec);
3015                 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3016                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3017                         struct btf_type *vt;
3018
3019                         vt = (void *)btf__type_by_id(obj->btf, vs->type);
3020                         ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3021                         ext = find_extern_by_name(obj, ext_name);
3022                         if (!ext) {
3023                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3024                                         ext_name);
3025                                 return -ESRCH;
3026                         }
3027                         btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3028                         vt->type = int_btf_id;
3029                         vs->offset = off;
3030                         vs->size = sizeof(int);
3031                 }
3032                 sec->size = off;
3033         }
3034
3035         if (kcfg_sec) {
3036                 sec = kcfg_sec;
3037                 /* for kcfg externs calculate their offsets within a .kconfig map */
3038                 off = 0;
3039                 for (i = 0; i < obj->nr_extern; i++) {
3040                         ext = &obj->externs[i];
3041                         if (ext->type != EXT_KCFG)
3042                                 continue;
3043
3044                         ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3045                         off = ext->kcfg.data_off + ext->kcfg.sz;
3046                         pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3047                                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3048                 }
3049                 sec->size = off;
3050                 n = btf_vlen(sec);
3051                 for (i = 0; i < n; i++) {
3052                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3053
3054                         t = btf__type_by_id(obj->btf, vs->type);
3055                         ext_name = btf__name_by_offset(obj->btf, t->name_off);
3056                         ext = find_extern_by_name(obj, ext_name);
3057                         if (!ext) {
3058                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3059                                         ext_name);
3060                                 return -ESRCH;
3061                         }
3062                         btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3063                         vs->offset = ext->kcfg.data_off;
3064                 }
3065         }
3066         return 0;
3067 }
3068
3069 static struct bpf_program *
3070 bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
3071 {
3072         struct bpf_program *prog;
3073         size_t i;
3074
3075         for (i = 0; i < obj->nr_programs; i++) {
3076                 prog = &obj->programs[i];
3077                 if (prog->idx == idx)
3078                         return prog;
3079         }
3080         return NULL;
3081 }
3082
3083 struct bpf_program *
3084 bpf_object__find_program_by_title(const struct bpf_object *obj,
3085                                   const char *title)
3086 {
3087         struct bpf_program *pos;
3088
3089         bpf_object__for_each_program(pos, obj) {
3090                 if (pos->section_name && !strcmp(pos->section_name, title))
3091                         return pos;
3092         }
3093         return NULL;
3094 }
3095
3096 struct bpf_program *
3097 bpf_object__find_program_by_name(const struct bpf_object *obj,
3098                                  const char *name)
3099 {
3100         struct bpf_program *prog;
3101
3102         bpf_object__for_each_program(prog, obj) {
3103                 if (!strcmp(prog->name, name))
3104                         return prog;
3105         }
3106         return NULL;
3107 }
3108
3109 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3110                                       int shndx)
3111 {
3112         return shndx == obj->efile.data_shndx ||
3113                shndx == obj->efile.bss_shndx ||
3114                shndx == obj->efile.rodata_shndx;
3115 }
3116
3117 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
3118                                       int shndx)
3119 {
3120         return shndx == obj->efile.maps_shndx ||
3121                shndx == obj->efile.btf_maps_shndx;
3122 }
3123
3124 static enum libbpf_map_type
3125 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
3126 {
3127         if (shndx == obj->efile.data_shndx)
3128                 return LIBBPF_MAP_DATA;
3129         else if (shndx == obj->efile.bss_shndx)
3130                 return LIBBPF_MAP_BSS;
3131         else if (shndx == obj->efile.rodata_shndx)
3132                 return LIBBPF_MAP_RODATA;
3133         else if (shndx == obj->efile.symbols_shndx)
3134                 return LIBBPF_MAP_KCONFIG;
3135         else
3136                 return LIBBPF_MAP_UNSPEC;
3137 }
3138
3139 static int bpf_program__record_reloc(struct bpf_program *prog,
3140                                      struct reloc_desc *reloc_desc,
3141                                      __u32 insn_idx, const char *name,
3142                                      const GElf_Sym *sym, const GElf_Rel *rel)
3143 {
3144         struct bpf_insn *insn = &prog->insns[insn_idx];
3145         size_t map_idx, nr_maps = prog->obj->nr_maps;
3146         struct bpf_object *obj = prog->obj;
3147         __u32 shdr_idx = sym->st_shndx;
3148         enum libbpf_map_type type;
3149         struct bpf_map *map;
3150
3151         /* sub-program call relocation */
3152         if (insn->code == (BPF_JMP | BPF_CALL)) {
3153                 if (insn->src_reg != BPF_PSEUDO_CALL) {
3154                         pr_warn("incorrect bpf_call opcode\n");
3155                         return -LIBBPF_ERRNO__RELOC;
3156                 }
3157                 /* text_shndx can be 0, if no default "main" program exists */
3158                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
3159                         pr_warn("bad call relo against section %u\n", shdr_idx);
3160                         return -LIBBPF_ERRNO__RELOC;
3161                 }
3162                 if (sym->st_value % 8) {
3163                         pr_warn("bad call relo offset: %zu\n",
3164                                 (size_t)sym->st_value);
3165                         return -LIBBPF_ERRNO__RELOC;
3166                 }
3167                 reloc_desc->type = RELO_CALL;
3168                 reloc_desc->insn_idx = insn_idx;
3169                 reloc_desc->sym_off = sym->st_value;
3170                 obj->has_pseudo_calls = true;
3171                 return 0;
3172         }
3173
3174         if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
3175                 pr_warn("invalid relo for insns[%d].code 0x%x\n",
3176                         insn_idx, insn->code);
3177                 return -LIBBPF_ERRNO__RELOC;
3178         }
3179
3180         if (sym_is_extern(sym)) {
3181                 int sym_idx = GELF_R_SYM(rel->r_info);
3182                 int i, n = obj->nr_extern;
3183                 struct extern_desc *ext;
3184
3185                 for (i = 0; i < n; i++) {
3186                         ext = &obj->externs[i];
3187                         if (ext->sym_idx == sym_idx)
3188                                 break;
3189                 }
3190                 if (i >= n) {
3191                         pr_warn("extern relo failed to find extern for sym %d\n",
3192                                 sym_idx);
3193                         return -LIBBPF_ERRNO__RELOC;
3194                 }
3195                 pr_debug("found extern #%d '%s' (sym %d) for insn %u\n",
3196                          i, ext->name, ext->sym_idx, insn_idx);
3197                 reloc_desc->type = RELO_EXTERN;
3198                 reloc_desc->insn_idx = insn_idx;
3199                 reloc_desc->sym_off = i; /* sym_off stores extern index */
3200                 return 0;
3201         }
3202
3203         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
3204                 pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n",
3205                         name, shdr_idx);
3206                 return -LIBBPF_ERRNO__RELOC;
3207         }
3208
3209         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
3210
3211         /* generic map reference relocation */
3212         if (type == LIBBPF_MAP_UNSPEC) {
3213                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
3214                         pr_warn("bad map relo against section %u\n",
3215                                 shdr_idx);
3216                         return -LIBBPF_ERRNO__RELOC;
3217                 }
3218                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3219                         map = &obj->maps[map_idx];
3220                         if (map->libbpf_type != type ||
3221                             map->sec_idx != sym->st_shndx ||
3222                             map->sec_offset != sym->st_value)
3223                                 continue;
3224                         pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n",
3225                                  map_idx, map->name, map->sec_idx,
3226                                  map->sec_offset, insn_idx);
3227                         break;
3228                 }
3229                 if (map_idx >= nr_maps) {
3230                         pr_warn("map relo failed to find map for sec %u, off %zu\n",
3231                                 shdr_idx, (size_t)sym->st_value);
3232                         return -LIBBPF_ERRNO__RELOC;
3233                 }
3234                 reloc_desc->type = RELO_LD64;
3235                 reloc_desc->insn_idx = insn_idx;
3236                 reloc_desc->map_idx = map_idx;
3237                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
3238                 return 0;
3239         }
3240
3241         /* global data map relocation */
3242         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
3243                 pr_warn("bad data relo against section %u\n", shdr_idx);
3244                 return -LIBBPF_ERRNO__RELOC;
3245         }
3246         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3247                 map = &obj->maps[map_idx];
3248                 if (map->libbpf_type != type)
3249                         continue;
3250                 pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n",
3251                          map_idx, map->name, map->sec_idx, map->sec_offset,
3252                          insn_idx);
3253                 break;
3254         }
3255         if (map_idx >= nr_maps) {
3256                 pr_warn("data relo failed to find map for sec %u\n",
3257                         shdr_idx);
3258                 return -LIBBPF_ERRNO__RELOC;
3259         }
3260
3261         reloc_desc->type = RELO_DATA;
3262         reloc_desc->insn_idx = insn_idx;
3263         reloc_desc->map_idx = map_idx;
3264         reloc_desc->sym_off = sym->st_value;
3265         return 0;
3266 }
3267
3268 static int
3269 bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
3270                            Elf_Data *data, struct bpf_object *obj)
3271 {
3272         Elf_Data *symbols = obj->efile.symbols;
3273         int err, i, nrels;
3274
3275         pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
3276         nrels = shdr->sh_size / shdr->sh_entsize;
3277
3278         prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
3279         if (!prog->reloc_desc) {
3280                 pr_warn("failed to alloc memory in relocation\n");
3281                 return -ENOMEM;
3282         }
3283         prog->nr_reloc = nrels;
3284
3285         for (i = 0; i < nrels; i++) {
3286                 const char *name;
3287                 __u32 insn_idx;
3288                 GElf_Sym sym;
3289                 GElf_Rel rel;
3290
3291                 if (!gelf_getrel(data, i, &rel)) {
3292                         pr_warn("relocation: failed to get %d reloc\n", i);
3293                         return -LIBBPF_ERRNO__FORMAT;
3294                 }
3295                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
3296                         pr_warn("relocation: symbol %"PRIx64" not found\n",
3297                                 GELF_R_SYM(rel.r_info));
3298                         return -LIBBPF_ERRNO__FORMAT;
3299                 }
3300                 if (rel.r_offset % sizeof(struct bpf_insn))
3301                         return -LIBBPF_ERRNO__FORMAT;
3302
3303                 insn_idx = rel.r_offset / sizeof(struct bpf_insn);
3304                 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
3305                                   sym.st_name) ? : "<?>";
3306
3307                 pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
3308                          (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info),
3309                          (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info),
3310                          GELF_ST_BIND(sym.st_info), sym.st_name, name,
3311                          insn_idx);
3312
3313                 err = bpf_program__record_reloc(prog, &prog->reloc_desc[i],
3314                                                 insn_idx, name, &sym, &rel);
3315                 if (err)
3316                         return err;
3317         }
3318         return 0;
3319 }
3320
3321 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
3322 {
3323         struct bpf_map_def *def = &map->def;
3324         __u32 key_type_id = 0, value_type_id = 0;
3325         int ret;
3326
3327         /* if it's BTF-defined map, we don't need to search for type IDs.
3328          * For struct_ops map, it does not need btf_key_type_id and
3329          * btf_value_type_id.
3330          */
3331         if (map->sec_idx == obj->efile.btf_maps_shndx ||
3332             bpf_map__is_struct_ops(map))
3333                 return 0;
3334
3335         if (!bpf_map__is_internal(map)) {
3336                 ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
3337                                            def->value_size, &key_type_id,
3338                                            &value_type_id);
3339         } else {
3340                 /*
3341                  * LLVM annotates global data differently in BTF, that is,
3342                  * only as '.data', '.bss' or '.rodata'.
3343                  */
3344                 ret = btf__find_by_name(obj->btf,
3345                                 libbpf_type_to_btf_name[map->libbpf_type]);
3346         }
3347         if (ret < 0)
3348                 return ret;
3349
3350         map->btf_key_type_id = key_type_id;
3351         map->btf_value_type_id = bpf_map__is_internal(map) ?
3352                                  ret : value_type_id;
3353         return 0;
3354 }
3355
3356 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
3357 {
3358         struct bpf_map_info info = {};
3359         __u32 len = sizeof(info);
3360         int new_fd, err;
3361         char *new_name;
3362
3363         err = bpf_obj_get_info_by_fd(fd, &info, &len);
3364         if (err)
3365                 return err;
3366
3367         new_name = strdup(info.name);
3368         if (!new_name)
3369                 return -errno;
3370
3371         new_fd = open("/", O_RDONLY | O_CLOEXEC);
3372         if (new_fd < 0) {
3373                 err = -errno;
3374                 goto err_free_new_name;
3375         }
3376
3377         new_fd = dup3(fd, new_fd, O_CLOEXEC);
3378         if (new_fd < 0) {
3379                 err = -errno;
3380                 goto err_close_new_fd;
3381         }
3382
3383         err = zclose(map->fd);
3384         if (err) {
3385                 err = -errno;
3386                 goto err_close_new_fd;
3387         }
3388         free(map->name);
3389
3390         map->fd = new_fd;
3391         map->name = new_name;
3392         map->def.type = info.type;
3393         map->def.key_size = info.key_size;
3394         map->def.value_size = info.value_size;
3395         map->def.max_entries = info.max_entries;
3396         map->def.map_flags = info.map_flags;
3397         map->btf_key_type_id = info.btf_key_type_id;
3398         map->btf_value_type_id = info.btf_value_type_id;
3399         map->reused = true;
3400
3401         return 0;
3402
3403 err_close_new_fd:
3404         close(new_fd);
3405 err_free_new_name:
3406         free(new_name);
3407         return err;
3408 }
3409
3410 __u32 bpf_map__max_entries(const struct bpf_map *map)
3411 {
3412         return map->def.max_entries;
3413 }
3414
3415 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
3416 {
3417         if (map->fd >= 0)
3418                 return -EBUSY;
3419         map->def.max_entries = max_entries;
3420         return 0;
3421 }
3422
3423 int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
3424 {
3425         if (!map || !max_entries)
3426                 return -EINVAL;
3427
3428         return bpf_map__set_max_entries(map, max_entries);
3429 }
3430
3431 static int
3432 bpf_object__probe_loading(struct bpf_object *obj)
3433 {
3434         struct bpf_load_program_attr attr;
3435         char *cp, errmsg[STRERR_BUFSIZE];
3436         struct bpf_insn insns[] = {
3437                 BPF_MOV64_IMM(BPF_REG_0, 0),
3438                 BPF_EXIT_INSN(),
3439         };
3440         int ret;
3441
3442         /* make sure basic loading works */
3443
3444         memset(&attr, 0, sizeof(attr));
3445         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3446         attr.insns = insns;
3447         attr.insns_cnt = ARRAY_SIZE(insns);
3448         attr.license = "GPL";
3449
3450         ret = bpf_load_program_xattr(&attr, NULL, 0);
3451         if (ret < 0) {
3452                 ret = errno;
3453                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3454                 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
3455                         "program. Make sure your kernel supports BPF "
3456                         "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
3457                         "set to big enough value.\n", __func__, cp, ret);
3458                 return -ret;
3459         }
3460         close(ret);
3461
3462         return 0;
3463 }
3464
3465 static int probe_fd(int fd)
3466 {
3467         if (fd >= 0)
3468                 close(fd);
3469         return fd >= 0;
3470 }
3471
3472 static int probe_kern_prog_name(void)
3473 {
3474         struct bpf_load_program_attr attr;
3475         struct bpf_insn insns[] = {
3476                 BPF_MOV64_IMM(BPF_REG_0, 0),
3477                 BPF_EXIT_INSN(),
3478         };
3479         int ret;
3480
3481         /* make sure loading with name works */
3482
3483         memset(&attr, 0, sizeof(attr));
3484         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3485         attr.insns = insns;
3486         attr.insns_cnt = ARRAY_SIZE(insns);
3487         attr.license = "GPL";
3488         attr.name = "test";
3489         ret = bpf_load_program_xattr(&attr, NULL, 0);
3490         return probe_fd(ret);
3491 }
3492
3493 static int probe_kern_global_data(void)
3494 {
3495         struct bpf_load_program_attr prg_attr;
3496         struct bpf_create_map_attr map_attr;
3497         char *cp, errmsg[STRERR_BUFSIZE];
3498         struct bpf_insn insns[] = {
3499                 BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
3500                 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
3501                 BPF_MOV64_IMM(BPF_REG_0, 0),
3502                 BPF_EXIT_INSN(),
3503         };
3504         int ret, map;
3505
3506         memset(&map_attr, 0, sizeof(map_attr));
3507         map_attr.map_type = BPF_MAP_TYPE_ARRAY;
3508         map_attr.key_size = sizeof(int);
3509         map_attr.value_size = 32;
3510         map_attr.max_entries = 1;
3511
3512         map = bpf_create_map_xattr(&map_attr);
3513         if (map < 0) {
3514                 ret = -errno;
3515                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3516                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
3517                         __func__, cp, -ret);
3518                 return ret;
3519         }
3520
3521         insns[0].imm = map;
3522
3523         memset(&prg_attr, 0, sizeof(prg_attr));
3524         prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3525         prg_attr.insns = insns;
3526         prg_attr.insns_cnt = ARRAY_SIZE(insns);
3527         prg_attr.license = "GPL";
3528
3529         ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
3530         close(map);
3531         return probe_fd(ret);
3532 }
3533
3534 static int probe_kern_btf(void)
3535 {
3536         static const char strs[] = "\0int";
3537         __u32 types[] = {
3538                 /* int */
3539                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
3540         };
3541
3542         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3543                                              strs, sizeof(strs)));
3544 }
3545
3546 static int probe_kern_btf_func(void)
3547 {
3548         static const char strs[] = "\0int\0x\0a";
3549         /* void x(int a) {} */
3550         __u32 types[] = {
3551                 /* int */
3552                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
3553                 /* FUNC_PROTO */                                /* [2] */
3554                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
3555                 BTF_PARAM_ENC(7, 1),
3556                 /* FUNC x */                                    /* [3] */
3557                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
3558         };
3559
3560         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3561                                              strs, sizeof(strs)));
3562 }
3563
3564 static int probe_kern_btf_func_global(void)
3565 {
3566         static const char strs[] = "\0int\0x\0a";
3567         /* static void x(int a) {} */
3568         __u32 types[] = {
3569                 /* int */
3570                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
3571                 /* FUNC_PROTO */                                /* [2] */
3572                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
3573                 BTF_PARAM_ENC(7, 1),
3574                 /* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
3575                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
3576         };
3577
3578         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3579                                              strs, sizeof(strs)));
3580 }
3581
3582 static int probe_kern_btf_datasec(void)
3583 {
3584         static const char strs[] = "\0x\0.data";
3585         /* static int a; */
3586         __u32 types[] = {
3587                 /* int */
3588                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
3589                 /* VAR x */                                     /* [2] */
3590                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
3591                 BTF_VAR_STATIC,
3592                 /* DATASEC val */                               /* [3] */
3593                 BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
3594                 BTF_VAR_SECINFO_ENC(2, 0, 4),
3595         };
3596
3597         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3598                                              strs, sizeof(strs)));
3599 }
3600
3601 static int probe_kern_array_mmap(void)
3602 {
3603         struct bpf_create_map_attr attr = {
3604                 .map_type = BPF_MAP_TYPE_ARRAY,
3605                 .map_flags = BPF_F_MMAPABLE,
3606                 .key_size = sizeof(int),
3607                 .value_size = sizeof(int),
3608                 .max_entries = 1,
3609         };
3610
3611         return probe_fd(bpf_create_map_xattr(&attr));
3612 }
3613
3614 static int probe_kern_exp_attach_type(void)
3615 {
3616         struct bpf_load_program_attr attr;
3617         struct bpf_insn insns[] = {
3618                 BPF_MOV64_IMM(BPF_REG_0, 0),
3619                 BPF_EXIT_INSN(),
3620         };
3621
3622         memset(&attr, 0, sizeof(attr));
3623         /* use any valid combination of program type and (optional)
3624          * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
3625          * to see if kernel supports expected_attach_type field for
3626          * BPF_PROG_LOAD command
3627          */
3628         attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
3629         attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
3630         attr.insns = insns;
3631         attr.insns_cnt = ARRAY_SIZE(insns);
3632         attr.license = "GPL";
3633
3634         return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
3635 }
3636
3637 static int probe_kern_probe_read_kernel(void)
3638 {
3639         struct bpf_load_program_attr attr;
3640         struct bpf_insn insns[] = {
3641                 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),   /* r1 = r10 (fp) */
3642                 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),  /* r1 += -8 */
3643                 BPF_MOV64_IMM(BPF_REG_2, 8),            /* r2 = 8 */
3644                 BPF_MOV64_IMM(BPF_REG_3, 0),            /* r3 = 0 */
3645                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
3646                 BPF_EXIT_INSN(),
3647         };
3648
3649         memset(&attr, 0, sizeof(attr));
3650         attr.prog_type = BPF_PROG_TYPE_KPROBE;
3651         attr.insns = insns;
3652         attr.insns_cnt = ARRAY_SIZE(insns);
3653         attr.license = "GPL";
3654
3655         return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
3656 }
3657
3658 enum kern_feature_result {
3659         FEAT_UNKNOWN = 0,
3660         FEAT_SUPPORTED = 1,
3661         FEAT_MISSING = 2,
3662 };
3663
3664 typedef int (*feature_probe_fn)(void);
3665
3666 static struct kern_feature_desc {
3667         const char *desc;
3668         feature_probe_fn probe;
3669         enum kern_feature_result res;
3670 } feature_probes[__FEAT_CNT] = {
3671         [FEAT_PROG_NAME] = {
3672                 "BPF program name", probe_kern_prog_name,
3673         },
3674         [FEAT_GLOBAL_DATA] = {
3675                 "global variables", probe_kern_global_data,
3676         },
3677         [FEAT_BTF] = {
3678                 "minimal BTF", probe_kern_btf,
3679         },
3680         [FEAT_BTF_FUNC] = {
3681                 "BTF functions", probe_kern_btf_func,
3682         },
3683         [FEAT_BTF_GLOBAL_FUNC] = {
3684                 "BTF global function", probe_kern_btf_func_global,
3685         },
3686         [FEAT_BTF_DATASEC] = {
3687                 "BTF data section and variable", probe_kern_btf_datasec,
3688         },
3689         [FEAT_ARRAY_MMAP] = {
3690                 "ARRAY map mmap()", probe_kern_array_mmap,
3691         },
3692         [FEAT_EXP_ATTACH_TYPE] = {
3693                 "BPF_PROG_LOAD expected_attach_type attribute",
3694                 probe_kern_exp_attach_type,
3695         },
3696         [FEAT_PROBE_READ_KERN] = {
3697                 "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
3698         }
3699 };
3700
3701 static bool kernel_supports(enum kern_feature_id feat_id)
3702 {
3703         struct kern_feature_desc *feat = &feature_probes[feat_id];
3704         int ret;
3705
3706         if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
3707                 ret = feat->probe();
3708                 if (ret > 0) {
3709                         WRITE_ONCE(feat->res, FEAT_SUPPORTED);
3710                 } else if (ret == 0) {
3711                         WRITE_ONCE(feat->res, FEAT_MISSING);
3712                 } else {
3713                         pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
3714                         WRITE_ONCE(feat->res, FEAT_MISSING);
3715                 }
3716         }
3717
3718         return READ_ONCE(feat->res) == FEAT_SUPPORTED;
3719 }
3720
3721 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
3722 {
3723         struct bpf_map_info map_info = {};
3724         char msg[STRERR_BUFSIZE];
3725         __u32 map_info_len;
3726
3727         map_info_len = sizeof(map_info);
3728
3729         if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
3730                 pr_warn("failed to get map info for map FD %d: %s\n",
3731                         map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
3732                 return false;
3733         }
3734
3735         return (map_info.type == map->def.type &&
3736                 map_info.key_size == map->def.key_size &&
3737                 map_info.value_size == map->def.value_size &&
3738                 map_info.max_entries == map->def.max_entries &&
3739                 map_info.map_flags == map->def.map_flags);
3740 }
3741
3742 static int
3743 bpf_object__reuse_map(struct bpf_map *map)
3744 {
3745         char *cp, errmsg[STRERR_BUFSIZE];
3746         int err, pin_fd;
3747
3748         pin_fd = bpf_obj_get(map->pin_path);
3749         if (pin_fd < 0) {
3750                 err = -errno;
3751                 if (err == -ENOENT) {
3752                         pr_debug("found no pinned map to reuse at '%s'\n",
3753                                  map->pin_path);
3754                         return 0;
3755                 }
3756
3757                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
3758                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
3759                         map->pin_path, cp);
3760                 return err;
3761         }
3762
3763         if (!map_is_reuse_compat(map, pin_fd)) {
3764                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
3765                         map->pin_path);
3766                 close(pin_fd);
3767                 return -EINVAL;
3768         }
3769
3770         err = bpf_map__reuse_fd(map, pin_fd);
3771         if (err) {
3772                 close(pin_fd);
3773                 return err;
3774         }
3775         map->pinned = true;
3776         pr_debug("reused pinned map at '%s'\n", map->pin_path);
3777
3778         return 0;
3779 }
3780
3781 static int
3782 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
3783 {
3784         enum libbpf_map_type map_type = map->libbpf_type;
3785         char *cp, errmsg[STRERR_BUFSIZE];
3786         int err, zero = 0;
3787
3788         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
3789         if (err) {
3790                 err = -errno;
3791                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
3792                 pr_warn("Error setting initial map(%s) contents: %s\n",
3793                         map->name, cp);
3794                 return err;
3795         }
3796
3797         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
3798         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
3799                 err = bpf_map_freeze(map->fd);
3800                 if (err) {
3801                         err = -errno;
3802                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
3803                         pr_warn("Error freezing map(%s) as read-only: %s\n",
3804                                 map->name, cp);
3805                         return err;
3806                 }
3807         }
3808         return 0;
3809 }
3810
3811 static void bpf_map__destroy(struct bpf_map *map);
3812
3813 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
3814 {
3815         struct bpf_create_map_attr create_attr;
3816         struct bpf_map_def *def = &map->def;
3817
3818         memset(&create_attr, 0, sizeof(create_attr));
3819
3820         if (kernel_supports(FEAT_PROG_NAME))
3821                 create_attr.name = map->name;
3822         create_attr.map_ifindex = map->map_ifindex;
3823         create_attr.map_type = def->type;
3824         create_attr.map_flags = def->map_flags;
3825         create_attr.key_size = def->key_size;
3826         create_attr.value_size = def->value_size;
3827         create_attr.numa_node = map->numa_node;
3828
3829         if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
3830                 int nr_cpus;
3831
3832                 nr_cpus = libbpf_num_possible_cpus();
3833                 if (nr_cpus < 0) {
3834                         pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
3835                                 map->name, nr_cpus);
3836                         return nr_cpus;
3837                 }
3838                 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
3839                 create_attr.max_entries = nr_cpus;
3840         } else {
3841                 create_attr.max_entries = def->max_entries;
3842         }
3843
3844         if (bpf_map__is_struct_ops(map))
3845                 create_attr.btf_vmlinux_value_type_id =
3846                         map->btf_vmlinux_value_type_id;
3847
3848         create_attr.btf_fd = 0;
3849         create_attr.btf_key_type_id = 0;
3850         create_attr.btf_value_type_id = 0;
3851         if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
3852                 create_attr.btf_fd = btf__fd(obj->btf);
3853                 create_attr.btf_key_type_id = map->btf_key_type_id;
3854                 create_attr.btf_value_type_id = map->btf_value_type_id;
3855         }
3856
3857         if (bpf_map_type__is_map_in_map(def->type)) {
3858                 if (map->inner_map) {
3859                         int err;
3860
3861                         err = bpf_object__create_map(obj, map->inner_map);
3862                         if (err) {
3863                                 pr_warn("map '%s': failed to create inner map: %d\n",
3864                                         map->name, err);
3865                                 return err;
3866                         }
3867                         map->inner_map_fd = bpf_map__fd(map->inner_map);
3868                 }
3869                 if (map->inner_map_fd >= 0)
3870                         create_attr.inner_map_fd = map->inner_map_fd;
3871         }
3872
3873         map->fd = bpf_create_map_xattr(&create_attr);
3874         if (map->fd < 0 && (create_attr.btf_key_type_id ||
3875                             create_attr.btf_value_type_id)) {
3876                 char *cp, errmsg[STRERR_BUFSIZE];
3877                 int err = -errno;
3878
3879                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
3880                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
3881                         map->name, cp, err);
3882                 create_attr.btf_fd = 0;
3883                 create_attr.btf_key_type_id = 0;
3884                 create_attr.btf_value_type_id = 0;
3885                 map->btf_key_type_id = 0;
3886                 map->btf_value_type_id = 0;
3887                 map->fd = bpf_create_map_xattr(&create_attr);
3888         }
3889
3890         if (map->fd < 0)
3891                 return -errno;
3892
3893         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
3894                 bpf_map__destroy(map->inner_map);
3895                 zfree(&map->inner_map);
3896         }
3897
3898         return 0;
3899 }
3900
3901 static int
3902 bpf_object__create_maps(struct bpf_object *obj)
3903 {
3904         struct bpf_map *map;
3905         char *cp, errmsg[STRERR_BUFSIZE];
3906         unsigned int i, j;
3907         int err;
3908
3909         for (i = 0; i < obj->nr_maps; i++) {
3910                 map = &obj->maps[i];
3911
3912                 if (map->pin_path) {
3913                         err = bpf_object__reuse_map(map);
3914                         if (err) {
3915                                 pr_warn("map '%s': error reusing pinned map\n",
3916                                         map->name);
3917                                 goto err_out;
3918                         }
3919                 }
3920
3921                 if (map->fd >= 0) {
3922                         pr_debug("map '%s': skipping creation (preset fd=%d)\n",
3923                                  map->name, map->fd);
3924                         continue;
3925                 }
3926
3927                 err = bpf_object__create_map(obj, map);
3928                 if (err)
3929                         goto err_out;
3930
3931                 pr_debug("map '%s': created successfully, fd=%d\n", map->name,
3932                          map->fd);
3933
3934                 if (bpf_map__is_internal(map)) {
3935                         err = bpf_object__populate_internal_map(obj, map);
3936                         if (err < 0) {
3937                                 zclose(map->fd);
3938                                 goto err_out;
3939                         }
3940                 }
3941
3942                 if (map->init_slots_sz) {
3943                         for (j = 0; j < map->init_slots_sz; j++) {
3944                                 const struct bpf_map *targ_map;
3945                                 int fd;
3946
3947                                 if (!map->init_slots[j])
3948                                         continue;
3949
3950                                 targ_map = map->init_slots[j];
3951                                 fd = bpf_map__fd(targ_map);
3952                                 err = bpf_map_update_elem(map->fd, &j, &fd, 0);
3953                                 if (err) {
3954                                         err = -errno;
3955                                         pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
3956                                                 map->name, j, targ_map->name,
3957                                                 fd, err);
3958                                         goto err_out;
3959                                 }
3960                                 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
3961                                          map->name, j, targ_map->name, fd);
3962                         }
3963                         zfree(&map->init_slots);
3964                         map->init_slots_sz = 0;
3965                 }
3966
3967                 if (map->pin_path && !map->pinned) {
3968                         err = bpf_map__pin(map, NULL);
3969                         if (err) {
3970                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
3971                                         map->name, map->pin_path, err);
3972                                 zclose(map->fd);
3973                                 goto err_out;
3974                         }
3975                 }
3976         }
3977
3978         return 0;
3979
3980 err_out:
3981         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
3982         pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
3983         pr_perm_msg(err);
3984         for (j = 0; j < i; j++)
3985                 zclose(obj->maps[j].fd);
3986         return err;
3987 }
3988
3989 static int
3990 check_btf_ext_reloc_err(struct bpf_program *prog, int err,
3991                         void *btf_prog_info, const char *info_name)
3992 {
3993         if (err != -ENOENT) {
3994                 pr_warn("Error in loading %s for sec %s.\n",
3995                         info_name, prog->section_name);
3996                 return err;
3997         }
3998
3999         /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */
4000
4001         if (btf_prog_info) {
4002                 /*
4003                  * Some info has already been found but has problem
4004                  * in the last btf_ext reloc. Must have to error out.
4005                  */
4006                 pr_warn("Error in relocating %s for sec %s.\n",
4007                         info_name, prog->section_name);
4008                 return err;
4009         }
4010
4011         /* Have problem loading the very first info. Ignore the rest. */
4012         pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n",
4013                 info_name, prog->section_name, info_name);
4014         return 0;
4015 }
4016
4017 static int
4018 bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj,
4019                           const char *section_name,  __u32 insn_offset)
4020 {
4021         int err;
4022
4023         if (!insn_offset || prog->func_info) {
4024                 /*
4025                  * !insn_offset => main program
4026                  *
4027                  * For sub prog, the main program's func_info has to
4028                  * be loaded first (i.e. prog->func_info != NULL)
4029                  */
4030                 err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext,
4031                                                section_name, insn_offset,
4032                                                &prog->func_info,
4033                                                &prog->func_info_cnt);
4034                 if (err)
4035                         return check_btf_ext_reloc_err(prog, err,
4036                                                        prog->func_info,
4037                                                        "bpf_func_info");
4038
4039                 prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext);
4040         }
4041
4042         if (!insn_offset || prog->line_info) {
4043                 err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext,
4044                                                section_name, insn_offset,
4045                                                &prog->line_info,
4046                                                &prog->line_info_cnt);
4047                 if (err)
4048                         return check_btf_ext_reloc_err(prog, err,
4049                                                        prog->line_info,
4050                                                        "bpf_line_info");
4051
4052                 prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext);
4053         }
4054
4055         return 0;
4056 }
4057
4058 #define BPF_CORE_SPEC_MAX_LEN 64
4059
4060 /* represents BPF CO-RE field or array element accessor */
4061 struct bpf_core_accessor {
4062         __u32 type_id;          /* struct/union type or array element type */
4063         __u32 idx;              /* field index or array index */
4064         const char *name;       /* field name or NULL for array accessor */
4065 };
4066
4067 struct bpf_core_spec {
4068         const struct btf *btf;
4069         /* high-level spec: named fields and array indices only */
4070         struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
4071         /* original unresolved (no skip_mods_or_typedefs) root type ID */
4072         __u32 root_type_id;
4073         /* CO-RE relocation kind */
4074         enum bpf_core_relo_kind relo_kind;
4075         /* high-level spec length */
4076         int len;
4077         /* raw, low-level spec: 1-to-1 with accessor spec string */
4078         int raw_spec[BPF_CORE_SPEC_MAX_LEN];
4079         /* raw spec length */
4080         int raw_len;
4081         /* field bit offset represented by spec */
4082         __u32 bit_offset;
4083 };
4084
4085 static bool str_is_empty(const char *s)
4086 {
4087         return !s || !s[0];
4088 }
4089
4090 static bool is_flex_arr(const struct btf *btf,
4091                         const struct bpf_core_accessor *acc,
4092                         const struct btf_array *arr)
4093 {
4094         const struct btf_type *t;
4095
4096         /* not a flexible array, if not inside a struct or has non-zero size */
4097         if (!acc->name || arr->nelems > 0)
4098                 return false;
4099
4100         /* has to be the last member of enclosing struct */
4101         t = btf__type_by_id(btf, acc->type_id);
4102         return acc->idx == btf_vlen(t) - 1;
4103 }
4104
4105 static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
4106 {
4107         switch (kind) {
4108         case BPF_FIELD_BYTE_OFFSET: return "byte_off";
4109         case BPF_FIELD_BYTE_SIZE: return "byte_sz";
4110         case BPF_FIELD_EXISTS: return "field_exists";
4111         case BPF_FIELD_SIGNED: return "signed";
4112         case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
4113         case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
4114         default: return "unknown";
4115         }
4116 }
4117
4118 static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
4119 {
4120         switch (kind) {
4121         case BPF_FIELD_BYTE_OFFSET:
4122         case BPF_FIELD_BYTE_SIZE:
4123         case BPF_FIELD_EXISTS:
4124         case BPF_FIELD_SIGNED:
4125         case BPF_FIELD_LSHIFT_U64:
4126         case BPF_FIELD_RSHIFT_U64:
4127                 return true;
4128         default:
4129                 return false;
4130         }
4131 }
4132
4133 /*
4134  * Turn bpf_core_relo into a low- and high-level spec representation,
4135  * validating correctness along the way, as well as calculating resulting
4136  * field bit offset, specified by accessor string. Low-level spec captures
4137  * every single level of nestedness, including traversing anonymous
4138  * struct/union members. High-level one only captures semantically meaningful
4139  * "turning points": named fields and array indicies.
4140  * E.g., for this case:
4141  *
4142  *   struct sample {
4143  *       int __unimportant;
4144  *       struct {
4145  *           int __1;
4146  *           int __2;
4147  *           int a[7];
4148  *       };
4149  *   };
4150  *
4151  *   struct sample *s = ...;
4152  *
4153  *   int x = &s->a[3]; // access string = '0:1:2:3'
4154  *
4155  * Low-level spec has 1:1 mapping with each element of access string (it's
4156  * just a parsed access string representation): [0, 1, 2, 3].
4157  *
4158  * High-level spec will capture only 3 points:
4159  *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
4160  *   - field 'a' access (corresponds to '2' in low-level spec);
4161  *   - array element #3 access (corresponds to '3' in low-level spec).
4162  *
4163  */
4164 static int bpf_core_parse_spec(const struct btf *btf,
4165                                __u32 type_id,
4166                                const char *spec_str,
4167                                enum bpf_core_relo_kind relo_kind,
4168                                struct bpf_core_spec *spec)
4169 {
4170         int access_idx, parsed_len, i;
4171         struct bpf_core_accessor *acc;
4172         const struct btf_type *t;
4173         const char *name;
4174         __u32 id;
4175         __s64 sz;
4176
4177         if (str_is_empty(spec_str) || *spec_str == ':')
4178                 return -EINVAL;
4179
4180         memset(spec, 0, sizeof(*spec));
4181         spec->btf = btf;
4182         spec->root_type_id = type_id;
4183         spec->relo_kind = relo_kind;
4184
4185         /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
4186         while (*spec_str) {
4187                 if (*spec_str == ':')
4188                         ++spec_str;
4189                 if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
4190                         return -EINVAL;
4191                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4192                         return -E2BIG;
4193                 spec_str += parsed_len;
4194                 spec->raw_spec[spec->raw_len++] = access_idx;
4195         }
4196
4197         if (spec->raw_len == 0)
4198                 return -EINVAL;
4199
4200         /* first spec value is always reloc type array index */
4201         t = skip_mods_and_typedefs(btf, type_id, &id);
4202         if (!t)
4203                 return -EINVAL;
4204
4205         access_idx = spec->raw_spec[0];
4206         spec->spec[0].type_id = id;
4207         spec->spec[0].idx = access_idx;
4208         spec->len++;
4209
4210         if (!core_relo_is_field_based(relo_kind))
4211                 return -EINVAL;
4212
4213         sz = btf__resolve_size(btf, id);
4214         if (sz < 0)
4215                 return sz;
4216         spec->bit_offset = access_idx * sz * 8;
4217
4218         for (i = 1; i < spec->raw_len; i++) {
4219                 t = skip_mods_and_typedefs(btf, id, &id);
4220                 if (!t)
4221                         return -EINVAL;
4222
4223                 access_idx = spec->raw_spec[i];
4224                 acc = &spec->spec[spec->len];
4225
4226                 if (btf_is_composite(t)) {
4227                         const struct btf_member *m;
4228                         __u32 bit_offset;
4229
4230                         if (access_idx >= btf_vlen(t))
4231                                 return -EINVAL;
4232
4233                         bit_offset = btf_member_bit_offset(t, access_idx);
4234                         spec->bit_offset += bit_offset;
4235
4236                         m = btf_members(t) + access_idx;
4237                         if (m->name_off) {
4238                                 name = btf__name_by_offset(btf, m->name_off);
4239                                 if (str_is_empty(name))
4240                                         return -EINVAL;
4241
4242                                 acc->type_id = id;
4243                                 acc->idx = access_idx;
4244                                 acc->name = name;
4245                                 spec->len++;
4246                         }
4247
4248                         id = m->type;
4249                 } else if (btf_is_array(t)) {
4250                         const struct btf_array *a = btf_array(t);
4251                         bool flex;
4252
4253                         t = skip_mods_and_typedefs(btf, a->type, &id);
4254                         if (!t)
4255                                 return -EINVAL;
4256
4257                         flex = is_flex_arr(btf, acc - 1, a);
4258                         if (!flex && access_idx >= a->nelems)
4259                                 return -EINVAL;
4260
4261                         spec->spec[spec->len].type_id = id;
4262                         spec->spec[spec->len].idx = access_idx;
4263                         spec->len++;
4264
4265                         sz = btf__resolve_size(btf, id);
4266                         if (sz < 0)
4267                                 return sz;
4268                         spec->bit_offset += access_idx * sz * 8;
4269                 } else {
4270                         pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
4271                                 type_id, spec_str, i, id, btf_kind_str(t));
4272                         return -EINVAL;
4273                 }
4274         }
4275
4276         return 0;
4277 }
4278
4279 static bool bpf_core_is_flavor_sep(const char *s)
4280 {
4281         /* check X___Y name pattern, where X and Y are not underscores */
4282         return s[0] != '_' &&                                 /* X */
4283                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
4284                s[4] != '_';                                   /* Y */
4285 }
4286
4287 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
4288  * before last triple underscore. Struct name part after last triple
4289  * underscore is ignored by BPF CO-RE relocation during relocation matching.
4290  */
4291 static size_t bpf_core_essential_name_len(const char *name)
4292 {
4293         size_t n = strlen(name);
4294         int i;
4295
4296         for (i = n - 5; i >= 0; i--) {
4297                 if (bpf_core_is_flavor_sep(name + i))
4298                         return i + 1;
4299         }
4300         return n;
4301 }
4302
4303 /* dynamically sized list of type IDs */
4304 struct ids_vec {
4305         __u32 *data;
4306         int len;
4307 };
4308
4309 static void bpf_core_free_cands(struct ids_vec *cand_ids)
4310 {
4311         free(cand_ids->data);
4312         free(cand_ids);
4313 }
4314
4315 static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
4316                                            __u32 local_type_id,
4317                                            const struct btf *targ_btf)
4318 {
4319         size_t local_essent_len, targ_essent_len;
4320         const char *local_name, *targ_name, *targ_kind;
4321         const struct btf_type *t, *local_t;
4322         struct ids_vec *cand_ids;
4323         __u32 *new_ids;
4324         int i, err, n;
4325
4326         local_t = btf__type_by_id(local_btf, local_type_id);
4327         if (!local_t)
4328                 return ERR_PTR(-EINVAL);
4329
4330         local_name = btf__name_by_offset(local_btf, local_t->name_off);
4331         if (str_is_empty(local_name))
4332                 return ERR_PTR(-EINVAL);
4333         local_essent_len = bpf_core_essential_name_len(local_name);
4334
4335         cand_ids = calloc(1, sizeof(*cand_ids));
4336         if (!cand_ids)
4337                 return ERR_PTR(-ENOMEM);
4338
4339         n = btf__get_nr_types(targ_btf);
4340         for (i = 1; i <= n; i++) {
4341                 t = btf__type_by_id(targ_btf, i);
4342                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
4343                 if (str_is_empty(targ_name))
4344                         continue;
4345                 targ_kind = btf_kind_str(t);
4346
4347                 t = skip_mods_and_typedefs(targ_btf, i, NULL);
4348                 if (!btf_is_composite(t) && !btf_is_array(t))
4349                         continue;
4350
4351                 targ_essent_len = bpf_core_essential_name_len(targ_name);
4352                 if (targ_essent_len != local_essent_len)
4353                         continue;
4354
4355                 if (strncmp(local_name, targ_name, local_essent_len) == 0) {
4356                         pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
4357                                  local_type_id, btf_kind_str(local_t),
4358                                  local_name, i, targ_kind, targ_name);
4359                         new_ids = libbpf_reallocarray(cand_ids->data,
4360                                                       cand_ids->len + 1,
4361                                                       sizeof(*cand_ids->data));
4362                         if (!new_ids) {
4363                                 err = -ENOMEM;
4364                                 goto err_out;
4365                         }
4366                         cand_ids->data = new_ids;
4367                         cand_ids->data[cand_ids->len++] = i;
4368                 }
4369         }
4370         return cand_ids;
4371 err_out:
4372         bpf_core_free_cands(cand_ids);
4373         return ERR_PTR(err);
4374 }
4375
4376 /* Check two types for compatibility, skipping const/volatile/restrict and
4377  * typedefs, to ensure we are relocating compatible entities:
4378  *   - any two STRUCTs/UNIONs are compatible and can be mixed;
4379  *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
4380  *   - any two PTRs are always compatible;
4381  *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
4382  *     least one of enums should be anonymous;
4383  *   - for ENUMs, check sizes, names are ignored;
4384  *   - for INT, size and signedness are ignored;
4385  *   - for ARRAY, dimensionality is ignored, element types are checked for
4386  *     compatibility recursively;
4387  *   - everything else shouldn't be ever a target of relocation.
4388  * These rules are not set in stone and probably will be adjusted as we get
4389  * more experience with using BPF CO-RE relocations.
4390  */
4391 static int bpf_core_fields_are_compat(const struct btf *local_btf,
4392                                       __u32 local_id,
4393                                       const struct btf *targ_btf,
4394                                       __u32 targ_id)
4395 {
4396         const struct btf_type *local_type, *targ_type;
4397
4398 recur:
4399         local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
4400         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4401         if (!local_type || !targ_type)
4402                 return -EINVAL;
4403
4404         if (btf_is_composite(local_type) && btf_is_composite(targ_type))
4405                 return 1;
4406         if (btf_kind(local_type) != btf_kind(targ_type))
4407                 return 0;
4408
4409         switch (btf_kind(local_type)) {
4410         case BTF_KIND_PTR:
4411                 return 1;
4412         case BTF_KIND_FWD:
4413         case BTF_KIND_ENUM: {
4414                 const char *local_name, *targ_name;
4415                 size_t local_len, targ_len;
4416
4417                 local_name = btf__name_by_offset(local_btf,
4418                                                  local_type->name_off);
4419                 targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
4420                 local_len = bpf_core_essential_name_len(local_name);
4421                 targ_len = bpf_core_essential_name_len(targ_name);
4422                 /* one of them is anonymous or both w/ same flavor-less names */
4423                 return local_len == 0 || targ_len == 0 ||
4424                        (local_len == targ_len &&
4425                         strncmp(local_name, targ_name, local_len) == 0);
4426         }
4427         case BTF_KIND_INT:
4428                 /* just reject deprecated bitfield-like integers; all other
4429                  * integers are by default compatible between each other
4430                  */
4431                 return btf_int_offset(local_type) == 0 &&
4432                        btf_int_offset(targ_type) == 0;
4433         case BTF_KIND_ARRAY:
4434                 local_id = btf_array(local_type)->type;
4435                 targ_id = btf_array(targ_type)->type;
4436                 goto recur;
4437         default:
4438                 pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
4439                         btf_kind(local_type), local_id, targ_id);
4440                 return 0;
4441         }
4442 }
4443
4444 /*
4445  * Given single high-level named field accessor in local type, find
4446  * corresponding high-level accessor for a target type. Along the way,
4447  * maintain low-level spec for target as well. Also keep updating target
4448  * bit offset.
4449  *
4450  * Searching is performed through recursive exhaustive enumeration of all
4451  * fields of a struct/union. If there are any anonymous (embedded)
4452  * structs/unions, they are recursively searched as well. If field with
4453  * desired name is found, check compatibility between local and target types,
4454  * before returning result.
4455  *
4456  * 1 is returned, if field is found.
4457  * 0 is returned if no compatible field is found.
4458  * <0 is returned on error.
4459  */
4460 static int bpf_core_match_member(const struct btf *local_btf,
4461                                  const struct bpf_core_accessor *local_acc,
4462                                  const struct btf *targ_btf,
4463                                  __u32 targ_id,
4464                                  struct bpf_core_spec *spec,
4465                                  __u32 *next_targ_id)
4466 {
4467         const struct btf_type *local_type, *targ_type;
4468         const struct btf_member *local_member, *m;
4469         const char *local_name, *targ_name;
4470         __u32 local_id;
4471         int i, n, found;
4472
4473         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4474         if (!targ_type)
4475                 return -EINVAL;
4476         if (!btf_is_composite(targ_type))
4477                 return 0;
4478
4479         local_id = local_acc->type_id;
4480         local_type = btf__type_by_id(local_btf, local_id);
4481         local_member = btf_members(local_type) + local_acc->idx;
4482         local_name = btf__name_by_offset(local_btf, local_member->name_off);
4483
4484         n = btf_vlen(targ_type);
4485         m = btf_members(targ_type);
4486         for (i = 0; i < n; i++, m++) {
4487                 __u32 bit_offset;
4488
4489                 bit_offset = btf_member_bit_offset(targ_type, i);
4490
4491                 /* too deep struct/union/array nesting */
4492                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4493                         return -E2BIG;
4494
4495                 /* speculate this member will be the good one */
4496                 spec->bit_offset += bit_offset;
4497                 spec->raw_spec[spec->raw_len++] = i;
4498
4499                 targ_name = btf__name_by_offset(targ_btf, m->name_off);
4500                 if (str_is_empty(targ_name)) {
4501                         /* embedded struct/union, we need to go deeper */
4502                         found = bpf_core_match_member(local_btf, local_acc,
4503                                                       targ_btf, m->type,
4504                                                       spec, next_targ_id);
4505                         if (found) /* either found or error */
4506                                 return found;
4507                 } else if (strcmp(local_name, targ_name) == 0) {
4508                         /* matching named field */
4509                         struct bpf_core_accessor *targ_acc;
4510
4511                         targ_acc = &spec->spec[spec->len++];
4512                         targ_acc->type_id = targ_id;
4513                         targ_acc->idx = i;
4514                         targ_acc->name = targ_name;
4515
4516                         *next_targ_id = m->type;
4517                         found = bpf_core_fields_are_compat(local_btf,
4518                                                            local_member->type,
4519                                                            targ_btf, m->type);
4520                         if (!found)
4521                                 spec->len--; /* pop accessor */
4522                         return found;
4523                 }
4524                 /* member turned out not to be what we looked for */
4525                 spec->bit_offset -= bit_offset;
4526                 spec->raw_len--;
4527         }
4528
4529         return 0;
4530 }
4531
4532 /*
4533  * Try to match local spec to a target type and, if successful, produce full
4534  * target spec (high-level, low-level + bit offset).
4535  */
4536 static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
4537                                const struct btf *targ_btf, __u32 targ_id,
4538                                struct bpf_core_spec *targ_spec)
4539 {
4540         const struct btf_type *targ_type;
4541         const struct bpf_core_accessor *local_acc;
4542         struct bpf_core_accessor *targ_acc;
4543         int i, sz, matched;
4544
4545         memset(targ_spec, 0, sizeof(*targ_spec));
4546         targ_spec->btf = targ_btf;
4547         targ_spec->root_type_id = targ_id;
4548         targ_spec->relo_kind = local_spec->relo_kind;
4549
4550         local_acc = &local_spec->spec[0];
4551         targ_acc = &targ_spec->spec[0];
4552
4553         for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
4554                 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
4555                                                    &targ_id);
4556                 if (!targ_type)
4557                         return -EINVAL;
4558
4559                 if (local_acc->name) {
4560                         matched = bpf_core_match_member(local_spec->btf,
4561                                                         local_acc,
4562                                                         targ_btf, targ_id,
4563                                                         targ_spec, &targ_id);
4564                         if (matched <= 0)
4565                                 return matched;
4566                 } else {
4567                         /* for i=0, targ_id is already treated as array element
4568                          * type (because it's the original struct), for others
4569                          * we should find array element type first
4570                          */
4571                         if (i > 0) {
4572                                 const struct btf_array *a;
4573                                 bool flex;
4574
4575                                 if (!btf_is_array(targ_type))
4576                                         return 0;
4577
4578                                 a = btf_array(targ_type);
4579                                 flex = is_flex_arr(targ_btf, targ_acc - 1, a);
4580                                 if (!flex && local_acc->idx >= a->nelems)
4581                                         return 0;
4582                                 if (!skip_mods_and_typedefs(targ_btf, a->type,
4583                                                             &targ_id))
4584                                         return -EINVAL;
4585                         }
4586
4587                         /* too deep struct/union/array nesting */
4588                         if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4589                                 return -E2BIG;
4590
4591                         targ_acc->type_id = targ_id;
4592                         targ_acc->idx = local_acc->idx;
4593                         targ_acc->name = NULL;
4594                         targ_spec->len++;
4595                         targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
4596                         targ_spec->raw_len++;
4597
4598                         sz = btf__resolve_size(targ_btf, targ_id);
4599                         if (sz < 0)
4600                                 return sz;
4601                         targ_spec->bit_offset += local_acc->idx * sz * 8;
4602                 }
4603         }
4604
4605         return 1;
4606 }
4607
4608 static int bpf_core_calc_field_relo(const struct bpf_program *prog,
4609                                     const struct bpf_core_relo *relo,
4610                                     const struct bpf_core_spec *spec,
4611                                     __u32 *val, bool *validate)
4612 {
4613         const struct bpf_core_accessor *acc;
4614         const struct btf_type *t;
4615         __u32 byte_off, byte_sz, bit_off, bit_sz;
4616         const struct btf_member *m;
4617         const struct btf_type *mt;
4618         bool bitfield;
4619         __s64 sz;
4620
4621         if (relo->kind == BPF_FIELD_EXISTS) {
4622                 *val = spec ? 1 : 0;
4623                 return 0;
4624         }
4625
4626         if (!spec)
4627                 return -EUCLEAN; /* request instruction poisoning */
4628
4629         acc = &spec->spec[spec->len - 1];
4630         t = btf__type_by_id(spec->btf, acc->type_id);
4631
4632         /* a[n] accessor needs special handling */
4633         if (!acc->name) {
4634                 if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
4635                         *val = spec->bit_offset / 8;
4636                 } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
4637                         sz = btf__resolve_size(spec->btf, acc->type_id);
4638                         if (sz < 0)
4639                                 return -EINVAL;
4640                         *val = sz;
4641                 } else {
4642                         pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
4643                                 bpf_program__title(prog, false),
4644                                 relo->kind, relo->insn_off / 8);
4645                         return -EINVAL;
4646                 }
4647                 if (validate)
4648                         *validate = true;
4649                 return 0;
4650         }
4651
4652         m = btf_members(t) + acc->idx;
4653         mt = skip_mods_and_typedefs(spec->btf, m->type, NULL);
4654         bit_off = spec->bit_offset;
4655         bit_sz = btf_member_bitfield_size(t, acc->idx);
4656
4657         bitfield = bit_sz > 0;
4658         if (bitfield) {
4659                 byte_sz = mt->size;
4660                 byte_off = bit_off / 8 / byte_sz * byte_sz;
4661                 /* figure out smallest int size necessary for bitfield load */
4662                 while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
4663                         if (byte_sz >= 8) {
4664                                 /* bitfield can't be read with 64-bit read */
4665                                 pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
4666                                         bpf_program__title(prog, false),
4667                                         relo->kind, relo->insn_off / 8);
4668                                 return -E2BIG;
4669                         }
4670                         byte_sz *= 2;
4671                         byte_off = bit_off / 8 / byte_sz * byte_sz;
4672                 }
4673         } else {
4674                 sz = btf__resolve_size(spec->btf, m->type);
4675                 if (sz < 0)
4676                         return -EINVAL;
4677                 byte_sz = sz;
4678                 byte_off = spec->bit_offset / 8;
4679                 bit_sz = byte_sz * 8;
4680         }
4681
4682         /* for bitfields, all the relocatable aspects are ambiguous and we
4683          * might disagree with compiler, so turn off validation of expected
4684          * value, except for signedness
4685          */
4686         if (validate)
4687                 *validate = !bitfield;
4688
4689         switch (relo->kind) {
4690         case BPF_FIELD_BYTE_OFFSET:
4691                 *val = byte_off;
4692                 break;
4693         case BPF_FIELD_BYTE_SIZE:
4694                 *val = byte_sz;
4695                 break;
4696         case BPF_FIELD_SIGNED:
4697                 /* enums will be assumed unsigned */
4698                 *val = btf_is_enum(mt) ||
4699                        (btf_int_encoding(mt) & BTF_INT_SIGNED);
4700                 if (validate)
4701                         *validate = true; /* signedness is never ambiguous */
4702                 break;
4703         case BPF_FIELD_LSHIFT_U64:
4704 #if __BYTE_ORDER == __LITTLE_ENDIAN
4705                 *val = 64 - (bit_off + bit_sz - byte_off  * 8);
4706 #else
4707                 *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
4708 #endif
4709                 break;
4710         case BPF_FIELD_RSHIFT_U64:
4711                 *val = 64 - bit_sz;
4712                 if (validate)
4713                         *validate = true; /* right shift is never ambiguous */
4714                 break;
4715         case BPF_FIELD_EXISTS:
4716         default:
4717                 return -EOPNOTSUPP;
4718         }
4719
4720         return 0;
4721 }
4722
4723 struct bpf_core_relo_res
4724 {
4725         /* expected value in the instruction, unless validate == false */
4726         __u32 orig_val;
4727         /* new value that needs to be patched up to */
4728         __u32 new_val;
4729         /* relocation unsuccessful, poison instruction, but don't fail load */
4730         bool poison;
4731         /* some relocations can't be validated against orig_val */
4732         bool validate;
4733 };
4734
4735 /* Calculate original and target relocation values, given local and target
4736  * specs and relocation kind. These values are calculated for each candidate.
4737  * If there are multiple candidates, resulting values should all be consistent
4738  * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
4739  * If instruction has to be poisoned, *poison will be set to true.
4740  */
4741 static int bpf_core_calc_relo(const struct bpf_program *prog,
4742                               const struct bpf_core_relo *relo,
4743                               int relo_idx,
4744                               const struct bpf_core_spec *local_spec,
4745                               const struct bpf_core_spec *targ_spec,
4746                               struct bpf_core_relo_res *res)
4747 {
4748         int err = -EOPNOTSUPP;
4749
4750         res->orig_val = 0;
4751         res->new_val = 0;
4752         res->poison = false;
4753         res->validate = true;
4754
4755         if (core_relo_is_field_based(relo->kind)) {
4756                 err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate);
4757                 err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL);
4758         }
4759
4760         if (err == -EUCLEAN) {
4761                 /* EUCLEAN is used to signal instruction poisoning request */
4762                 res->poison = true;
4763                 err = 0;
4764         } else if (err == -EOPNOTSUPP) {
4765                 /* EOPNOTSUPP means unknown/unsupported relocation */
4766                 pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
4767                         bpf_program__title(prog, false), relo_idx,
4768                         core_relo_kind_str(relo->kind), relo->kind, relo->insn_off / 8);
4769         }
4770
4771         return err;
4772 }
4773
4774 /*
4775  * Turn instruction for which CO_RE relocation failed into invalid one with
4776  * distinct signature.
4777  */
4778 static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
4779                                  int insn_idx, struct bpf_insn *insn)
4780 {
4781         pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
4782                  bpf_program__title(prog, false), relo_idx, insn_idx);
4783         insn->code = BPF_JMP | BPF_CALL;
4784         insn->dst_reg = 0;
4785         insn->src_reg = 0;
4786         insn->off = 0;
4787         /* if this instruction is reachable (not a dead code),
4788          * verifier will complain with the following message:
4789          * invalid func unknown#195896080
4790          */
4791         insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
4792 }
4793
4794 /*
4795  * Patch relocatable BPF instruction.
4796  *
4797  * Patched value is determined by relocation kind and target specification.
4798  * For existence relocations target spec will be NULL if field/type is not found.
4799  * Expected insn->imm value is determined using relocation kind and local
4800  * spec, and is checked before patching instruction. If actual insn->imm value
4801  * is wrong, bail out with error.
4802  *
4803  * Currently three kinds of BPF instructions are supported:
4804  * 1. rX = <imm> (assignment with immediate operand);
4805  * 2. rX += <imm> (arithmetic operations with immediate operand);
4806  */
4807 static int bpf_core_patch_insn(struct bpf_program *prog,
4808                                const struct bpf_core_relo *relo,
4809                                int relo_idx,
4810                                const struct bpf_core_relo_res *res)
4811 {
4812         __u32 orig_val, new_val;
4813         struct bpf_insn *insn;
4814         int insn_idx;
4815         __u8 class;
4816
4817         if (relo->insn_off % sizeof(struct bpf_insn))
4818                 return -EINVAL;
4819         insn_idx = relo->insn_off / sizeof(struct bpf_insn);
4820         insn = &prog->insns[insn_idx];
4821         class = BPF_CLASS(insn->code);
4822
4823         if (res->poison) {
4824                 bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
4825                 return 0;
4826         }
4827
4828         orig_val = res->orig_val;
4829         new_val = res->new_val;
4830
4831         switch (class) {
4832         case BPF_ALU:
4833         case BPF_ALU64:
4834                 if (BPF_SRC(insn->code) != BPF_K)
4835                         return -EINVAL;
4836                 if (res->validate && insn->imm != orig_val) {
4837                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
4838                                 bpf_program__title(prog, false), relo_idx,
4839                                 insn_idx, insn->imm, orig_val, new_val);
4840                         return -EINVAL;
4841                 }
4842                 orig_val = insn->imm;
4843                 insn->imm = new_val;
4844                 pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
4845                          bpf_program__title(prog, false), relo_idx, insn_idx,
4846                          orig_val, new_val);
4847                 break;
4848         case BPF_LDX:
4849         case BPF_ST:
4850         case BPF_STX:
4851                 if (res->validate && insn->off != orig_val) {
4852                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n",
4853                                 bpf_program__title(prog, false), relo_idx,
4854                                 insn_idx, insn->off, orig_val, new_val);
4855                         return -EINVAL;
4856                 }
4857                 if (new_val > SHRT_MAX) {
4858                         pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
4859                                 bpf_program__title(prog, false), relo_idx,
4860                                 insn_idx, new_val);
4861                         return -ERANGE;
4862                 }
4863                 orig_val = insn->off;
4864                 insn->off = new_val;
4865                 pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
4866                          bpf_program__title(prog, false), relo_idx, insn_idx,
4867                          orig_val, new_val);
4868                 break;
4869         default:
4870                 pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
4871                         bpf_program__title(prog, false), relo_idx,
4872                         insn_idx, insn->code, insn->src_reg, insn->dst_reg,
4873                         insn->off, insn->imm);
4874                 return -EINVAL;
4875         }
4876
4877         return 0;
4878 }
4879
4880 /* Output spec definition in the format:
4881  * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
4882  * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
4883  */
4884 static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
4885 {
4886         const struct btf_type *t;
4887         const char *s;
4888         __u32 type_id;
4889         int i;
4890
4891         type_id = spec->root_type_id;
4892         t = btf__type_by_id(spec->btf, type_id);
4893         s = btf__name_by_offset(spec->btf, t->name_off);
4894
4895         libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
4896
4897         if (core_relo_is_field_based(spec->relo_kind)) {
4898                 for (i = 0; i < spec->len; i++) {
4899                         if (spec->spec[i].name)
4900                                 libbpf_print(level, ".%s", spec->spec[i].name);
4901                         else if (i > 0 || spec->spec[i].idx > 0)
4902                                 libbpf_print(level, "[%u]", spec->spec[i].idx);
4903                 }
4904
4905                 libbpf_print(level, " (");
4906                 for (i = 0; i < spec->raw_len; i++)
4907                         libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
4908
4909                 if (spec->bit_offset % 8)
4910                         libbpf_print(level, " @ offset %u.%u)",
4911                                      spec->bit_offset / 8, spec->bit_offset % 8);
4912                 else
4913                         libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
4914         }
4915 }
4916
4917 static size_t bpf_core_hash_fn(const void *key, void *ctx)
4918 {
4919         return (size_t)key;
4920 }
4921
4922 static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
4923 {
4924         return k1 == k2;
4925 }
4926
4927 static void *u32_as_hash_key(__u32 x)
4928 {
4929         return (void *)(uintptr_t)x;
4930 }
4931
4932 /*
4933  * CO-RE relocate single instruction.
4934  *
4935  * The outline and important points of the algorithm:
4936  * 1. For given local type, find corresponding candidate target types.
4937  *    Candidate type is a type with the same "essential" name, ignoring
4938  *    everything after last triple underscore (___). E.g., `sample`,
4939  *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
4940  *    for each other. Names with triple underscore are referred to as
4941  *    "flavors" and are useful, among other things, to allow to
4942  *    specify/support incompatible variations of the same kernel struct, which
4943  *    might differ between different kernel versions and/or build
4944  *    configurations.
4945  *
4946  *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
4947  *    converter, when deduplicated BTF of a kernel still contains more than
4948  *    one different types with the same name. In that case, ___2, ___3, etc
4949  *    are appended starting from second name conflict. But start flavors are
4950  *    also useful to be defined "locally", in BPF program, to extract same
4951  *    data from incompatible changes between different kernel
4952  *    versions/configurations. For instance, to handle field renames between
4953  *    kernel versions, one can use two flavors of the struct name with the
4954  *    same common name and use conditional relocations to extract that field,
4955  *    depending on target kernel version.
4956  * 2. For each candidate type, try to match local specification to this
4957  *    candidate target type. Matching involves finding corresponding
4958  *    high-level spec accessors, meaning that all named fields should match,
4959  *    as well as all array accesses should be within the actual bounds. Also,
4960  *    types should be compatible (see bpf_core_fields_are_compat for details).
4961  * 3. It is supported and expected that there might be multiple flavors
4962  *    matching the spec. As long as all the specs resolve to the same set of
4963  *    offsets across all candidates, there is no error. If there is any
4964  *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
4965  *    imprefection of BTF deduplication, which can cause slight duplication of
4966  *    the same BTF type, if some directly or indirectly referenced (by
4967  *    pointer) type gets resolved to different actual types in different
4968  *    object files. If such situation occurs, deduplicated BTF will end up
4969  *    with two (or more) structurally identical types, which differ only in
4970  *    types they refer to through pointer. This should be OK in most cases and
4971  *    is not an error.
4972  * 4. Candidate types search is performed by linearly scanning through all
4973  *    types in target BTF. It is anticipated that this is overall more
4974  *    efficient memory-wise and not significantly worse (if not better)
4975  *    CPU-wise compared to prebuilding a map from all local type names to
4976  *    a list of candidate type names. It's also sped up by caching resolved
4977  *    list of matching candidates per each local "root" type ID, that has at
4978  *    least one bpf_core_relo associated with it. This list is shared
4979  *    between multiple relocations for the same type ID and is updated as some
4980  *    of the candidates are pruned due to structural incompatibility.
4981  */
4982 static int bpf_core_reloc_field(struct bpf_program *prog,
4983                                  const struct bpf_core_relo *relo,
4984                                  int relo_idx,
4985                                  const struct btf *local_btf,
4986                                  const struct btf *targ_btf,
4987                                  struct hashmap *cand_cache)
4988 {
4989         const char *prog_name = bpf_program__title(prog, false);
4990         struct bpf_core_spec local_spec, cand_spec, targ_spec;
4991         const void *type_key = u32_as_hash_key(relo->type_id);
4992         struct bpf_core_relo_res cand_res, targ_res;
4993         const struct btf_type *local_type;
4994         const char *local_name;
4995         struct ids_vec *cand_ids;
4996         __u32 local_id, cand_id;
4997         const char *spec_str;
4998         int i, j, err;
4999
5000         local_id = relo->type_id;
5001         local_type = btf__type_by_id(local_btf, local_id);
5002         if (!local_type)
5003                 return -EINVAL;
5004
5005         local_name = btf__name_by_offset(local_btf, local_type->name_off);
5006         if (str_is_empty(local_name))
5007                 return -EINVAL;
5008
5009         spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
5010         if (str_is_empty(spec_str))
5011                 return -EINVAL;
5012
5013         err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
5014         if (err) {
5015                 pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
5016                         prog_name, relo_idx, local_id, btf_kind_str(local_type),
5017                         local_name, spec_str, err);
5018                 return -EINVAL;
5019         }
5020
5021         pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
5022                  relo_idx, core_relo_kind_str(relo->kind), relo->kind);
5023         bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
5024         libbpf_print(LIBBPF_DEBUG, "\n");
5025
5026         if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
5027                 cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
5028                 if (IS_ERR(cand_ids)) {
5029                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
5030                                 prog_name, relo_idx, local_id, btf_kind_str(local_type), local_name,
5031                                 PTR_ERR(cand_ids));
5032                         return PTR_ERR(cand_ids);
5033                 }
5034                 err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
5035                 if (err) {
5036                         bpf_core_free_cands(cand_ids);
5037                         return err;
5038                 }
5039         }
5040
5041         for (i = 0, j = 0; i < cand_ids->len; i++) {
5042                 cand_id = cand_ids->data[i];
5043                 err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
5044                 if (err < 0) {
5045                         pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
5046                                 prog_name, relo_idx, i);
5047                         bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
5048                         libbpf_print(LIBBPF_WARN, ": %d\n", err);
5049                         return err;
5050                 }
5051
5052                 pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
5053                          relo_idx, err == 0 ? "non-matching" : "matching", i);
5054                 bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
5055                 libbpf_print(LIBBPF_DEBUG, "\n");
5056
5057                 if (err == 0)
5058                         continue;
5059
5060                 err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
5061                 if (err)
5062                         return err;
5063
5064                 if (j == 0) {
5065                         targ_res = cand_res;
5066                         targ_spec = cand_spec;
5067                 } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
5068                         /* if there are many field relo candidates, they
5069                          * should all resolve to the same bit offset
5070                          */
5071                         pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
5072                                 prog_name, relo_idx, cand_spec.bit_offset,
5073                                 targ_spec.bit_offset);
5074                         return -EINVAL;
5075                 } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
5076                         /* all candidates should result in the same relocation
5077                          * decision and value, otherwise it's dangerous to
5078                          * proceed due to ambiguity
5079                          */
5080                         pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
5081                                 prog_name, relo_idx,
5082                                 cand_res.poison ? "failure" : "success", cand_res.new_val,
5083                                 targ_res.poison ? "failure" : "success", targ_res.new_val);
5084                         return -EINVAL;
5085                 }
5086
5087                 cand_ids->data[j++] = cand_spec.spec[0].type_id;
5088         }
5089
5090         /*
5091          * For BPF_FIELD_EXISTS relo or when used BPF program has field
5092          * existence checks or kernel version/config checks, it's expected
5093          * that we might not find any candidates. In this case, if field
5094          * wasn't found in any candidate, the list of candidates shouldn't
5095          * change at all, we'll just handle relocating appropriately,
5096          * depending on relo's kind.
5097          */
5098         if (j > 0)
5099                 cand_ids->len = j;
5100
5101         /*
5102          * If no candidates were found, it might be both a programmer error,
5103          * as well as expected case, depending whether instruction w/
5104          * relocation is guarded in some way that makes it unreachable (dead
5105          * code) if relocation can't be resolved. This is handled in
5106          * bpf_core_reloc_insn() uniformly by replacing that instruction with
5107          * BPF helper call insn (using invalid helper ID). If that instruction
5108          * is indeed unreachable, then it will be ignored and eliminated by
5109          * verifier. If it was an error, then verifier will complain and point
5110          * to a specific instruction number in its log.
5111          */
5112         if (j == 0) {
5113                 pr_debug("prog '%s': relo #%d: no matching targets found\n",
5114                          prog_name, relo_idx);
5115
5116                 /* calculate single target relo result explicitly */
5117                 err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
5118                 if (err)
5119                         return err;
5120         }
5121
5122         /* bpf_core_patch_insn() should know how to handle missing targ_spec */
5123         err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
5124         if (err) {
5125                 pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
5126                         prog_name, relo_idx, relo->insn_off, err);
5127                 return -EINVAL;
5128         }
5129
5130         return 0;
5131 }
5132
5133 static int
5134 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5135 {
5136         const struct btf_ext_info_sec *sec;
5137         const struct bpf_core_relo *rec;
5138         const struct btf_ext_info *seg;
5139         struct hashmap_entry *entry;
5140         struct hashmap *cand_cache = NULL;
5141         struct bpf_program *prog;
5142         struct btf *targ_btf;
5143         const char *sec_name;
5144         int i, err = 0;
5145
5146         if (obj->btf_ext->core_relo_info.len == 0)
5147                 return 0;
5148
5149         if (targ_btf_path)
5150                 targ_btf = btf__parse_elf(targ_btf_path, NULL);
5151         else
5152                 targ_btf = obj->btf_vmlinux;
5153         if (IS_ERR_OR_NULL(targ_btf)) {
5154                 pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
5155                 return PTR_ERR(targ_btf);
5156         }
5157
5158         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5159         if (IS_ERR(cand_cache)) {
5160                 err = PTR_ERR(cand_cache);
5161                 goto out;
5162         }
5163
5164         seg = &obj->btf_ext->core_relo_info;
5165         for_each_btf_ext_sec(seg, sec) {
5166                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5167                 if (str_is_empty(sec_name)) {
5168                         err = -EINVAL;
5169                         goto out;
5170                 }
5171                 prog = NULL;
5172                 for (i = 0; i < obj->nr_programs; i++) {
5173                         if (!strcmp(obj->programs[i].section_name, sec_name)) {
5174                                 prog = &obj->programs[i];
5175                                 break;
5176                         }
5177                 }
5178                 if (!prog) {
5179                         pr_warn("failed to find program '%s' for CO-RE offset relocation\n",
5180                                 sec_name);
5181                         err = -EINVAL;
5182                         goto out;
5183                 }
5184
5185                 pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
5186                          sec_name, sec->num_info);
5187
5188                 for_each_btf_ext_rec(seg, sec, i, rec) {
5189                         err = bpf_core_reloc_field(prog, rec, i, obj->btf,
5190                                                    targ_btf, cand_cache);
5191                         if (err) {
5192                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5193                                         sec_name, i, err);
5194                                 goto out;
5195                         }
5196                 }
5197         }
5198
5199 out:
5200         /* obj->btf_vmlinux is freed at the end of object load phase */
5201         if (targ_btf != obj->btf_vmlinux)
5202                 btf__free(targ_btf);
5203         if (!IS_ERR_OR_NULL(cand_cache)) {
5204                 hashmap__for_each_entry(cand_cache, entry, i) {
5205                         bpf_core_free_cands(entry->value);
5206                 }
5207                 hashmap__free(cand_cache);
5208         }
5209         return err;
5210 }
5211
5212 static int
5213 bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
5214                         struct reloc_desc *relo)
5215 {
5216         struct bpf_insn *insn, *new_insn;
5217         struct bpf_program *text;
5218         size_t new_cnt;
5219         int err;
5220
5221         if (prog->idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) {
5222                 text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
5223                 if (!text) {
5224                         pr_warn("no .text section found yet relo into text exist\n");
5225                         return -LIBBPF_ERRNO__RELOC;
5226                 }
5227                 new_cnt = prog->insns_cnt + text->insns_cnt;
5228                 new_insn = libbpf_reallocarray(prog->insns, new_cnt, sizeof(*insn));
5229                 if (!new_insn) {
5230                         pr_warn("oom in prog realloc\n");
5231                         return -ENOMEM;
5232                 }
5233                 prog->insns = new_insn;
5234
5235                 if (obj->btf_ext) {
5236                         err = bpf_program_reloc_btf_ext(prog, obj,
5237                                                         text->section_name,
5238                                                         prog->insns_cnt);
5239                         if (err)
5240                                 return err;
5241                 }
5242
5243                 memcpy(new_insn + prog->insns_cnt, text->insns,
5244                        text->insns_cnt * sizeof(*insn));
5245                 prog->main_prog_cnt = prog->insns_cnt;
5246                 prog->insns_cnt = new_cnt;
5247                 pr_debug("added %zd insn from %s to prog %s\n",
5248                          text->insns_cnt, text->section_name,
5249                          prog->section_name);
5250         }
5251
5252         insn = &prog->insns[relo->insn_idx];
5253         insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx;
5254         return 0;
5255 }
5256
5257 static int
5258 bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
5259 {
5260         int i, err;
5261
5262         if (!prog)
5263                 return 0;
5264
5265         if (obj->btf_ext) {
5266                 err = bpf_program_reloc_btf_ext(prog, obj,
5267                                                 prog->section_name, 0);
5268                 if (err)
5269                         return err;
5270         }
5271
5272         if (!prog->reloc_desc)
5273                 return 0;
5274
5275         for (i = 0; i < prog->nr_reloc; i++) {
5276                 struct reloc_desc *relo = &prog->reloc_desc[i];
5277                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
5278                 struct extern_desc *ext;
5279
5280                 if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
5281                         pr_warn("relocation out of range: '%s'\n",
5282                                 prog->section_name);
5283                         return -LIBBPF_ERRNO__RELOC;
5284                 }
5285
5286                 switch (relo->type) {
5287                 case RELO_LD64:
5288                         insn[0].src_reg = BPF_PSEUDO_MAP_FD;
5289                         insn[0].imm = obj->maps[relo->map_idx].fd;
5290                         break;
5291                 case RELO_DATA:
5292                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5293                         insn[1].imm = insn[0].imm + relo->sym_off;
5294                         insn[0].imm = obj->maps[relo->map_idx].fd;
5295                         break;
5296                 case RELO_EXTERN:
5297                         ext = &obj->externs[relo->sym_off];
5298                         if (ext->type == EXT_KCFG) {
5299                                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5300                                 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
5301                                 insn[1].imm = ext->kcfg.data_off;
5302                         } else /* EXT_KSYM */ {
5303                                 insn[0].imm = (__u32)ext->ksym.addr;
5304                                 insn[1].imm = ext->ksym.addr >> 32;
5305                         }
5306                         break;
5307                 case RELO_CALL:
5308                         err = bpf_program__reloc_text(prog, obj, relo);
5309                         if (err)
5310                                 return err;
5311                         break;
5312                 default:
5313                         pr_warn("relo #%d: bad relo type %d\n", i, relo->type);
5314                         return -EINVAL;
5315                 }
5316         }
5317
5318         zfree(&prog->reloc_desc);
5319         prog->nr_reloc = 0;
5320         return 0;
5321 }
5322
5323 static int
5324 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
5325 {
5326         struct bpf_program *prog;
5327         size_t i;
5328         int err;
5329
5330         if (obj->btf_ext) {
5331                 err = bpf_object__relocate_core(obj, targ_btf_path);
5332                 if (err) {
5333                         pr_warn("failed to perform CO-RE relocations: %d\n",
5334                                 err);
5335                         return err;
5336                 }
5337         }
5338         /* ensure .text is relocated first, as it's going to be copied as-is
5339          * later for sub-program calls
5340          */
5341         for (i = 0; i < obj->nr_programs; i++) {
5342                 prog = &obj->programs[i];
5343                 if (prog->idx != obj->efile.text_shndx)
5344                         continue;
5345
5346                 err = bpf_program__relocate(prog, obj);
5347                 if (err) {
5348                         pr_warn("failed to relocate '%s'\n", prog->section_name);
5349                         return err;
5350                 }
5351                 break;
5352         }
5353         /* now relocate everything but .text, which by now is relocated
5354          * properly, so we can copy raw sub-program instructions as is safely
5355          */
5356         for (i = 0; i < obj->nr_programs; i++) {
5357                 prog = &obj->programs[i];
5358                 if (prog->idx == obj->efile.text_shndx)
5359                         continue;
5360
5361                 err = bpf_program__relocate(prog, obj);
5362                 if (err) {
5363                         pr_warn("failed to relocate '%s'\n", prog->section_name);
5364                         return err;
5365                 }
5366         }
5367         return 0;
5368 }
5369
5370 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
5371                                             GElf_Shdr *shdr, Elf_Data *data);
5372
5373 static int bpf_object__collect_map_relos(struct bpf_object *obj,
5374                                          GElf_Shdr *shdr, Elf_Data *data)
5375 {
5376         const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
5377         int i, j, nrels, new_sz;
5378         const struct btf_var_secinfo *vi = NULL;
5379         const struct btf_type *sec, *var, *def;
5380         const struct btf_member *member;
5381         struct bpf_map *map, *targ_map;
5382         const char *name, *mname;
5383         Elf_Data *symbols;
5384         unsigned int moff;
5385         GElf_Sym sym;
5386         GElf_Rel rel;
5387         void *tmp;
5388
5389         if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
5390                 return -EINVAL;
5391         sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
5392         if (!sec)
5393                 return -EINVAL;
5394
5395         symbols = obj->efile.symbols;
5396         nrels = shdr->sh_size / shdr->sh_entsize;
5397         for (i = 0; i < nrels; i++) {
5398                 if (!gelf_getrel(data, i, &rel)) {
5399                         pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
5400                         return -LIBBPF_ERRNO__FORMAT;
5401                 }
5402                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
5403                         pr_warn(".maps relo #%d: symbol %zx not found\n",
5404                                 i, (size_t)GELF_R_SYM(rel.r_info));
5405                         return -LIBBPF_ERRNO__FORMAT;
5406                 }
5407                 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
5408                                   sym.st_name) ? : "<?>";
5409                 if (sym.st_shndx != obj->efile.btf_maps_shndx) {
5410                         pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
5411                                 i, name);
5412                         return -LIBBPF_ERRNO__RELOC;
5413                 }
5414
5415                 pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
5416                          i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
5417                          (size_t)rel.r_offset, sym.st_name, name);
5418
5419                 for (j = 0; j < obj->nr_maps; j++) {
5420                         map = &obj->maps[j];
5421                         if (map->sec_idx != obj->efile.btf_maps_shndx)
5422                                 continue;
5423
5424                         vi = btf_var_secinfos(sec) + map->btf_var_idx;
5425                         if (vi->offset <= rel.r_offset &&
5426                             rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
5427                                 break;
5428                 }
5429                 if (j == obj->nr_maps) {
5430                         pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
5431                                 i, name, (size_t)rel.r_offset);
5432                         return -EINVAL;
5433                 }
5434
5435                 if (!bpf_map_type__is_map_in_map(map->def.type))
5436                         return -EINVAL;
5437                 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
5438                     map->def.key_size != sizeof(int)) {
5439                         pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
5440                                 i, map->name, sizeof(int));
5441                         return -EINVAL;
5442                 }
5443
5444                 targ_map = bpf_object__find_map_by_name(obj, name);
5445                 if (!targ_map)
5446                         return -ESRCH;
5447
5448                 var = btf__type_by_id(obj->btf, vi->type);
5449                 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
5450                 if (btf_vlen(def) == 0)
5451                         return -EINVAL;
5452                 member = btf_members(def) + btf_vlen(def) - 1;
5453                 mname = btf__name_by_offset(obj->btf, member->name_off);
5454                 if (strcmp(mname, "values"))
5455                         return -EINVAL;
5456
5457                 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
5458                 if (rel.r_offset - vi->offset < moff)
5459                         return -EINVAL;
5460
5461                 moff = rel.r_offset - vi->offset - moff;
5462                 /* here we use BPF pointer size, which is always 64 bit, as we
5463                  * are parsing ELF that was built for BPF target
5464                  */
5465                 if (moff % bpf_ptr_sz)
5466                         return -EINVAL;
5467                 moff /= bpf_ptr_sz;
5468                 if (moff >= map->init_slots_sz) {
5469                         new_sz = moff + 1;
5470                         tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
5471                         if (!tmp)
5472                                 return -ENOMEM;
5473                         map->init_slots = tmp;
5474                         memset(map->init_slots + map->init_slots_sz, 0,
5475                                (new_sz - map->init_slots_sz) * host_ptr_sz);
5476                         map->init_slots_sz = new_sz;
5477                 }
5478                 map->init_slots[moff] = targ_map;
5479
5480                 pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
5481                          i, map->name, moff, name);
5482         }
5483
5484         return 0;
5485 }
5486
5487 static int bpf_object__collect_reloc(struct bpf_object *obj)
5488 {
5489         int i, err;
5490
5491         if (!obj_elf_valid(obj)) {
5492                 pr_warn("Internal error: elf object is closed\n");
5493                 return -LIBBPF_ERRNO__INTERNAL;
5494         }
5495
5496         for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
5497                 GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
5498                 Elf_Data *data = obj->efile.reloc_sects[i].data;
5499                 int idx = shdr->sh_info;
5500                 struct bpf_program *prog;
5501
5502                 if (shdr->sh_type != SHT_REL) {
5503                         pr_warn("internal error at %d\n", __LINE__);
5504                         return -LIBBPF_ERRNO__INTERNAL;
5505                 }
5506
5507                 if (idx == obj->efile.st_ops_shndx) {
5508                         err = bpf_object__collect_st_ops_relos(obj, shdr, data);
5509                 } else if (idx == obj->efile.btf_maps_shndx) {
5510                         err = bpf_object__collect_map_relos(obj, shdr, data);
5511                 } else {
5512                         prog = bpf_object__find_prog_by_idx(obj, idx);
5513                         if (!prog) {
5514                                 pr_warn("relocation failed: no prog in section(%d)\n", idx);
5515                                 return -LIBBPF_ERRNO__RELOC;
5516                         }
5517                         err = bpf_program__collect_reloc(prog, shdr, data, obj);
5518                 }
5519                 if (err)
5520                         return err;
5521         }
5522         return 0;
5523 }
5524
5525 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
5526 {
5527         __u8 class = BPF_CLASS(insn->code);
5528
5529         if ((class == BPF_JMP || class == BPF_JMP32) &&
5530             BPF_OP(insn->code) == BPF_CALL &&
5531             BPF_SRC(insn->code) == BPF_K &&
5532             insn->src_reg == 0 && insn->dst_reg == 0) {
5533                     if (func_id)
5534                             *func_id = insn->imm;
5535                     return true;
5536         }
5537         return false;
5538 }
5539
5540 static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
5541 {
5542         struct bpf_insn *insn = prog->insns;
5543         enum bpf_func_id func_id;
5544         int i;
5545
5546         for (i = 0; i < prog->insns_cnt; i++, insn++) {
5547                 if (!insn_is_helper_call(insn, &func_id))
5548                         continue;
5549
5550                 /* on kernels that don't yet support
5551                  * bpf_probe_read_{kernel,user}[_str] helpers, fall back
5552                  * to bpf_probe_read() which works well for old kernels
5553                  */
5554                 switch (func_id) {
5555                 case BPF_FUNC_probe_read_kernel:
5556                 case BPF_FUNC_probe_read_user:
5557                         if (!kernel_supports(FEAT_PROBE_READ_KERN))
5558                                 insn->imm = BPF_FUNC_probe_read;
5559                         break;
5560                 case BPF_FUNC_probe_read_kernel_str:
5561                 case BPF_FUNC_probe_read_user_str:
5562                         if (!kernel_supports(FEAT_PROBE_READ_KERN))
5563                                 insn->imm = BPF_FUNC_probe_read_str;
5564                         break;
5565                 default:
5566                         break;
5567                 }
5568         }
5569         return 0;
5570 }
5571
5572 static int
5573 load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
5574              char *license, __u32 kern_version, int *pfd)
5575 {
5576         struct bpf_load_program_attr load_attr;
5577         char *cp, errmsg[STRERR_BUFSIZE];
5578         size_t log_buf_size = 0;
5579         char *log_buf = NULL;
5580         int btf_fd, ret;
5581
5582         if (!insns || !insns_cnt)
5583                 return -EINVAL;
5584
5585         memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
5586         load_attr.prog_type = prog->type;
5587         /* old kernels might not support specifying expected_attach_type */
5588         if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
5589             prog->sec_def->is_exp_attach_type_optional)
5590                 load_attr.expected_attach_type = 0;
5591         else
5592                 load_attr.expected_attach_type = prog->expected_attach_type;
5593         if (kernel_supports(FEAT_PROG_NAME))
5594                 load_attr.name = prog->name;
5595         load_attr.insns = insns;
5596         load_attr.insns_cnt = insns_cnt;
5597         load_attr.license = license;
5598         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
5599             prog->type == BPF_PROG_TYPE_LSM) {
5600                 load_attr.attach_btf_id = prog->attach_btf_id;
5601         } else if (prog->type == BPF_PROG_TYPE_TRACING ||
5602                    prog->type == BPF_PROG_TYPE_EXT) {
5603                 load_attr.attach_prog_fd = prog->attach_prog_fd;
5604                 load_attr.attach_btf_id = prog->attach_btf_id;
5605         } else {
5606                 load_attr.kern_version = kern_version;
5607                 load_attr.prog_ifindex = prog->prog_ifindex;
5608         }
5609         /* specify func_info/line_info only if kernel supports them */
5610         btf_fd = bpf_object__btf_fd(prog->obj);
5611         if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
5612                 load_attr.prog_btf_fd = btf_fd;
5613                 load_attr.func_info = prog->func_info;
5614                 load_attr.func_info_rec_size = prog->func_info_rec_size;
5615                 load_attr.func_info_cnt = prog->func_info_cnt;
5616                 load_attr.line_info = prog->line_info;
5617                 load_attr.line_info_rec_size = prog->line_info_rec_size;
5618                 load_attr.line_info_cnt = prog->line_info_cnt;
5619         }
5620         load_attr.log_level = prog->log_level;
5621         load_attr.prog_flags = prog->prog_flags;
5622
5623 retry_load:
5624         if (log_buf_size) {
5625                 log_buf = malloc(log_buf_size);
5626                 if (!log_buf)
5627                         return -ENOMEM;
5628
5629                 *log_buf = 0;
5630         }
5631
5632         ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
5633
5634         if (ret >= 0) {
5635                 if (log_buf && load_attr.log_level)
5636                         pr_debug("verifier log:\n%s", log_buf);
5637                 *pfd = ret;
5638                 ret = 0;
5639                 goto out;
5640         }
5641
5642         if (!log_buf || errno == ENOSPC) {
5643                 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
5644                                    log_buf_size << 1);
5645
5646                 free(log_buf);
5647                 goto retry_load;
5648         }
5649         ret = -errno;
5650         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
5651         pr_warn("load bpf program failed: %s\n", cp);
5652         pr_perm_msg(ret);
5653
5654         if (log_buf && log_buf[0] != '\0') {
5655                 ret = -LIBBPF_ERRNO__VERIFY;
5656                 pr_warn("-- BEGIN DUMP LOG ---\n");
5657                 pr_warn("\n%s\n", log_buf);
5658                 pr_warn("-- END LOG --\n");
5659         } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
5660                 pr_warn("Program too large (%zu insns), at most %d insns\n",
5661                         load_attr.insns_cnt, BPF_MAXINSNS);
5662                 ret = -LIBBPF_ERRNO__PROG2BIG;
5663         } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
5664                 /* Wrong program type? */
5665                 int fd;
5666
5667                 load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
5668                 load_attr.expected_attach_type = 0;
5669                 fd = bpf_load_program_xattr(&load_attr, NULL, 0);
5670                 if (fd >= 0) {
5671                         close(fd);
5672                         ret = -LIBBPF_ERRNO__PROGTYPE;
5673                         goto out;
5674                 }
5675         }
5676
5677 out:
5678         free(log_buf);
5679         return ret;
5680 }
5681
5682 static int libbpf_find_attach_btf_id(struct bpf_program *prog);
5683
5684 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
5685 {
5686         int err = 0, fd, i, btf_id;
5687
5688         if (prog->obj->loaded) {
5689                 pr_warn("prog '%s'('%s'): can't load after object was loaded\n",
5690                         prog->name, prog->section_name);
5691                 return -EINVAL;
5692         }
5693
5694         if ((prog->type == BPF_PROG_TYPE_TRACING ||
5695              prog->type == BPF_PROG_TYPE_LSM ||
5696              prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
5697                 btf_id = libbpf_find_attach_btf_id(prog);
5698                 if (btf_id <= 0)
5699                         return btf_id;
5700                 prog->attach_btf_id = btf_id;
5701         }
5702
5703         if (prog->instances.nr < 0 || !prog->instances.fds) {
5704                 if (prog->preprocessor) {
5705                         pr_warn("Internal error: can't load program '%s'\n",
5706                                 prog->section_name);
5707                         return -LIBBPF_ERRNO__INTERNAL;
5708                 }
5709
5710                 prog->instances.fds = malloc(sizeof(int));
5711                 if (!prog->instances.fds) {
5712                         pr_warn("Not enough memory for BPF fds\n");
5713                         return -ENOMEM;
5714                 }
5715                 prog->instances.nr = 1;
5716                 prog->instances.fds[0] = -1;
5717         }
5718
5719         if (!prog->preprocessor) {
5720                 if (prog->instances.nr != 1) {
5721                         pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n",
5722                                 prog->section_name, prog->instances.nr);
5723                 }
5724                 err = load_program(prog, prog->insns, prog->insns_cnt,
5725                                    license, kern_ver, &fd);
5726                 if (!err)
5727                         prog->instances.fds[0] = fd;
5728                 goto out;
5729         }
5730
5731         for (i = 0; i < prog->instances.nr; i++) {
5732                 struct bpf_prog_prep_result result;
5733                 bpf_program_prep_t preprocessor = prog->preprocessor;
5734
5735                 memset(&result, 0, sizeof(result));
5736                 err = preprocessor(prog, i, prog->insns,
5737                                    prog->insns_cnt, &result);
5738                 if (err) {
5739                         pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
5740                                 i, prog->section_name);
5741                         goto out;
5742                 }
5743
5744                 if (!result.new_insn_ptr || !result.new_insn_cnt) {
5745                         pr_debug("Skip loading the %dth instance of program '%s'\n",
5746                                  i, prog->section_name);
5747                         prog->instances.fds[i] = -1;
5748                         if (result.pfd)
5749                                 *result.pfd = -1;
5750                         continue;
5751                 }
5752
5753                 err = load_program(prog, result.new_insn_ptr,
5754                                    result.new_insn_cnt, license, kern_ver, &fd);
5755                 if (err) {
5756                         pr_warn("Loading the %dth instance of program '%s' failed\n",
5757                                 i, prog->section_name);
5758                         goto out;
5759                 }
5760
5761                 if (result.pfd)
5762                         *result.pfd = fd;
5763                 prog->instances.fds[i] = fd;
5764         }
5765 out:
5766         if (err)
5767                 pr_warn("failed to load program '%s'\n", prog->section_name);
5768         zfree(&prog->insns);
5769         prog->insns_cnt = 0;
5770         return err;
5771 }
5772
5773 static bool bpf_program__is_function_storage(const struct bpf_program *prog,
5774                                              const struct bpf_object *obj)
5775 {
5776         return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
5777 }
5778
5779 static int
5780 bpf_object__load_progs(struct bpf_object *obj, int log_level)
5781 {
5782         struct bpf_program *prog;
5783         size_t i;
5784         int err;
5785
5786         for (i = 0; i < obj->nr_programs; i++) {
5787                 prog = &obj->programs[i];
5788                 err = bpf_object__sanitize_prog(obj, prog);
5789                 if (err)
5790                         return err;
5791         }
5792
5793         for (i = 0; i < obj->nr_programs; i++) {
5794                 prog = &obj->programs[i];
5795                 if (bpf_program__is_function_storage(prog, obj))
5796                         continue;
5797                 if (!prog->load) {
5798                         pr_debug("prog '%s'('%s'): skipped loading\n",
5799                                  prog->name, prog->section_name);
5800                         continue;
5801                 }
5802                 prog->log_level |= log_level;
5803                 err = bpf_program__load(prog, obj->license, obj->kern_version);
5804                 if (err)
5805                         return err;
5806         }
5807         return 0;
5808 }
5809
5810 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
5811
5812 static struct bpf_object *
5813 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
5814                    const struct bpf_object_open_opts *opts)
5815 {
5816         const char *obj_name, *kconfig;
5817         struct bpf_program *prog;
5818         struct bpf_object *obj;
5819         char tmp_name[64];
5820         int err;
5821
5822         if (elf_version(EV_CURRENT) == EV_NONE) {
5823                 pr_warn("failed to init libelf for %s\n",
5824                         path ? : "(mem buf)");
5825                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
5826         }
5827
5828         if (!OPTS_VALID(opts, bpf_object_open_opts))
5829                 return ERR_PTR(-EINVAL);
5830
5831         obj_name = OPTS_GET(opts, object_name, NULL);
5832         if (obj_buf) {
5833                 if (!obj_name) {
5834                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
5835                                  (unsigned long)obj_buf,
5836                                  (unsigned long)obj_buf_sz);
5837                         obj_name = tmp_name;
5838                 }
5839                 path = obj_name;
5840                 pr_debug("loading object '%s' from buffer\n", obj_name);
5841         }
5842
5843         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
5844         if (IS_ERR(obj))
5845                 return obj;
5846
5847         kconfig = OPTS_GET(opts, kconfig, NULL);
5848         if (kconfig) {
5849                 obj->kconfig = strdup(kconfig);
5850                 if (!obj->kconfig)
5851                         return ERR_PTR(-ENOMEM);
5852         }
5853
5854         err = bpf_object__elf_init(obj);
5855         err = err ? : bpf_object__check_endianness(obj);
5856         err = err ? : bpf_object__elf_collect(obj);
5857         err = err ? : bpf_object__collect_externs(obj);
5858         err = err ? : bpf_object__finalize_btf(obj);
5859         err = err ? : bpf_object__init_maps(obj, opts);
5860         err = err ? : bpf_object__init_prog_names(obj);
5861         err = err ? : bpf_object__collect_reloc(obj);
5862         if (err)
5863                 goto out;
5864         bpf_object__elf_finish(obj);
5865
5866         bpf_object__for_each_program(prog, obj) {
5867                 prog->sec_def = find_sec_def(prog->section_name);
5868                 if (!prog->sec_def)
5869                         /* couldn't guess, but user might manually specify */
5870                         continue;
5871
5872                 bpf_program__set_type(prog, prog->sec_def->prog_type);
5873                 bpf_program__set_expected_attach_type(prog,
5874                                 prog->sec_def->expected_attach_type);
5875
5876                 if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
5877                     prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
5878                         prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
5879         }
5880
5881         return obj;
5882 out:
5883         bpf_object__close(obj);
5884         return ERR_PTR(err);
5885 }
5886
5887 static struct bpf_object *
5888 __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
5889 {
5890         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
5891                 .relaxed_maps = flags & MAPS_RELAX_COMPAT,
5892         );
5893
5894         /* param validation */
5895         if (!attr->file)
5896                 return NULL;
5897
5898         pr_debug("loading %s\n", attr->file);
5899         return __bpf_object__open(attr->file, NULL, 0, &opts);
5900 }
5901
5902 struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
5903 {
5904         return __bpf_object__open_xattr(attr, 0);
5905 }
5906
5907 struct bpf_object *bpf_object__open(const char *path)
5908 {
5909         struct bpf_object_open_attr attr = {
5910                 .file           = path,
5911                 .prog_type      = BPF_PROG_TYPE_UNSPEC,
5912         };
5913
5914         return bpf_object__open_xattr(&attr);
5915 }
5916
5917 struct bpf_object *
5918 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
5919 {
5920         if (!path)
5921                 return ERR_PTR(-EINVAL);
5922
5923         pr_debug("loading %s\n", path);
5924
5925         return __bpf_object__open(path, NULL, 0, opts);
5926 }
5927
5928 struct bpf_object *
5929 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
5930                      const struct bpf_object_open_opts *opts)
5931 {
5932         if (!obj_buf || obj_buf_sz == 0)
5933                 return ERR_PTR(-EINVAL);
5934
5935         return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
5936 }
5937
5938 struct bpf_object *
5939 bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
5940                         const char *name)
5941 {
5942         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
5943                 .object_name = name,
5944                 /* wrong default, but backwards-compatible */
5945                 .relaxed_maps = true,
5946         );
5947
5948         /* returning NULL is wrong, but backwards-compatible */
5949         if (!obj_buf || obj_buf_sz == 0)
5950                 return NULL;
5951
5952         return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
5953 }
5954
5955 int bpf_object__unload(struct bpf_object *obj)
5956 {
5957         size_t i;
5958
5959         if (!obj)
5960                 return -EINVAL;
5961
5962         for (i = 0; i < obj->nr_maps; i++) {
5963                 zclose(obj->maps[i].fd);
5964                 if (obj->maps[i].st_ops)
5965                         zfree(&obj->maps[i].st_ops->kern_vdata);
5966         }
5967
5968         for (i = 0; i < obj->nr_programs; i++)
5969                 bpf_program__unload(&obj->programs[i]);
5970
5971         return 0;
5972 }
5973
5974 static int bpf_object__sanitize_maps(struct bpf_object *obj)
5975 {
5976         struct bpf_map *m;
5977
5978         bpf_object__for_each_map(m, obj) {
5979                 if (!bpf_map__is_internal(m))
5980                         continue;
5981                 if (!kernel_supports(FEAT_GLOBAL_DATA)) {
5982                         pr_warn("kernel doesn't support global data\n");
5983                         return -ENOTSUP;
5984                 }
5985                 if (!kernel_supports(FEAT_ARRAY_MMAP))
5986                         m->def.map_flags ^= BPF_F_MMAPABLE;
5987         }
5988
5989         return 0;
5990 }
5991
5992 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
5993 {
5994         char sym_type, sym_name[500];
5995         unsigned long long sym_addr;
5996         struct extern_desc *ext;
5997         int ret, err = 0;
5998         FILE *f;
5999
6000         f = fopen("/proc/kallsyms", "r");
6001         if (!f) {
6002                 err = -errno;
6003                 pr_warn("failed to open /proc/kallsyms: %d\n", err);
6004                 return err;
6005         }
6006
6007         while (true) {
6008                 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
6009                              &sym_addr, &sym_type, sym_name);
6010                 if (ret == EOF && feof(f))
6011                         break;
6012                 if (ret != 3) {
6013                         pr_warn("failed to read kallsyms entry: %d\n", ret);
6014                         err = -EINVAL;
6015                         goto out;
6016                 }
6017
6018                 ext = find_extern_by_name(obj, sym_name);
6019                 if (!ext || ext->type != EXT_KSYM)
6020                         continue;
6021
6022                 if (ext->is_set && ext->ksym.addr != sym_addr) {
6023                         pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
6024                                 sym_name, ext->ksym.addr, sym_addr);
6025                         err = -EINVAL;
6026                         goto out;
6027                 }
6028                 if (!ext->is_set) {
6029                         ext->is_set = true;
6030                         ext->ksym.addr = sym_addr;
6031                         pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
6032                 }
6033         }
6034
6035 out:
6036         fclose(f);
6037         return err;
6038 }
6039
6040 static int bpf_object__resolve_externs(struct bpf_object *obj,
6041                                        const char *extra_kconfig)
6042 {
6043         bool need_config = false, need_kallsyms = false;
6044         struct extern_desc *ext;
6045         void *kcfg_data = NULL;
6046         int err, i;
6047
6048         if (obj->nr_extern == 0)
6049                 return 0;
6050
6051         if (obj->kconfig_map_idx >= 0)
6052                 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
6053
6054         for (i = 0; i < obj->nr_extern; i++) {
6055                 ext = &obj->externs[i];
6056
6057                 if (ext->type == EXT_KCFG &&
6058                     strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
6059                         void *ext_val = kcfg_data + ext->kcfg.data_off;
6060                         __u32 kver = get_kernel_version();
6061
6062                         if (!kver) {
6063                                 pr_warn("failed to get kernel version\n");
6064                                 return -EINVAL;
6065                         }
6066                         err = set_kcfg_value_num(ext, ext_val, kver);
6067                         if (err)
6068                                 return err;
6069                         pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
6070                 } else if (ext->type == EXT_KCFG &&
6071                            strncmp(ext->name, "CONFIG_", 7) == 0) {
6072                         need_config = true;
6073                 } else if (ext->type == EXT_KSYM) {
6074                         need_kallsyms = true;
6075                 } else {
6076                         pr_warn("unrecognized extern '%s'\n", ext->name);
6077                         return -EINVAL;
6078                 }
6079         }
6080         if (need_config && extra_kconfig) {
6081                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
6082                 if (err)
6083                         return -EINVAL;
6084                 need_config = false;
6085                 for (i = 0; i < obj->nr_extern; i++) {
6086                         ext = &obj->externs[i];
6087                         if (ext->type == EXT_KCFG && !ext->is_set) {
6088                                 need_config = true;
6089                                 break;
6090                         }
6091                 }
6092         }
6093         if (need_config) {
6094                 err = bpf_object__read_kconfig_file(obj, kcfg_data);
6095                 if (err)
6096                         return -EINVAL;
6097         }
6098         if (need_kallsyms) {
6099                 err = bpf_object__read_kallsyms_file(obj);
6100                 if (err)
6101                         return -EINVAL;
6102         }
6103         for (i = 0; i < obj->nr_extern; i++) {
6104                 ext = &obj->externs[i];
6105
6106                 if (!ext->is_set && !ext->is_weak) {
6107                         pr_warn("extern %s (strong) not resolved\n", ext->name);
6108                         return -ESRCH;
6109                 } else if (!ext->is_set) {
6110                         pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
6111                                  ext->name);
6112                 }
6113         }
6114
6115         return 0;
6116 }
6117
6118 int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
6119 {
6120         struct bpf_object *obj;
6121         int err, i;
6122
6123         if (!attr)
6124                 return -EINVAL;
6125         obj = attr->obj;
6126         if (!obj)
6127                 return -EINVAL;
6128
6129         if (obj->loaded) {
6130                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
6131                 return -EINVAL;
6132         }
6133
6134         err = bpf_object__probe_loading(obj);
6135         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
6136         err = err ? : bpf_object__sanitize_and_load_btf(obj);
6137         err = err ? : bpf_object__sanitize_maps(obj);
6138         err = err ? : bpf_object__load_vmlinux_btf(obj);
6139         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
6140         err = err ? : bpf_object__create_maps(obj);
6141         err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
6142         err = err ? : bpf_object__load_progs(obj, attr->log_level);
6143
6144         btf__free(obj->btf_vmlinux);
6145         obj->btf_vmlinux = NULL;
6146
6147         obj->loaded = true; /* doesn't matter if successfully or not */
6148
6149         if (err)
6150                 goto out;
6151
6152         return 0;
6153 out:
6154         /* unpin any maps that were auto-pinned during load */
6155         for (i = 0; i < obj->nr_maps; i++)
6156                 if (obj->maps[i].pinned && !obj->maps[i].reused)
6157                         bpf_map__unpin(&obj->maps[i], NULL);
6158
6159         bpf_object__unload(obj);
6160         pr_warn("failed to load object '%s'\n", obj->path);
6161         return err;
6162 }
6163
6164 int bpf_object__load(struct bpf_object *obj)
6165 {
6166         struct bpf_object_load_attr attr = {
6167                 .obj = obj,
6168         };
6169
6170         return bpf_object__load_xattr(&attr);
6171 }
6172
6173 static int make_parent_dir(const char *path)
6174 {
6175         char *cp, errmsg[STRERR_BUFSIZE];
6176         char *dname, *dir;
6177         int err = 0;
6178
6179         dname = strdup(path);
6180         if (dname == NULL)
6181                 return -ENOMEM;
6182
6183         dir = dirname(dname);
6184         if (mkdir(dir, 0700) && errno != EEXIST)
6185                 err = -errno;
6186
6187         free(dname);
6188         if (err) {
6189                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
6190                 pr_warn("failed to mkdir %s: %s\n", path, cp);
6191         }
6192         return err;
6193 }
6194
6195 static int check_path(const char *path)
6196 {
6197         char *cp, errmsg[STRERR_BUFSIZE];
6198         struct statfs st_fs;
6199         char *dname, *dir;
6200         int err = 0;
6201
6202         if (path == NULL)
6203                 return -EINVAL;
6204
6205         dname = strdup(path);
6206         if (dname == NULL)
6207                 return -ENOMEM;
6208
6209         dir = dirname(dname);
6210         if (statfs(dir, &st_fs)) {
6211                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6212                 pr_warn("failed to statfs %s: %s\n", dir, cp);
6213                 err = -errno;
6214         }
6215         free(dname);
6216
6217         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
6218                 pr_warn("specified path %s is not on BPF FS\n", path);
6219                 err = -EINVAL;
6220         }
6221
6222         return err;
6223 }
6224
6225 int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
6226                               int instance)
6227 {
6228         char *cp, errmsg[STRERR_BUFSIZE];
6229         int err;
6230
6231         err = make_parent_dir(path);
6232         if (err)
6233                 return err;
6234
6235         err = check_path(path);
6236         if (err)
6237                 return err;
6238
6239         if (prog == NULL) {
6240                 pr_warn("invalid program pointer\n");
6241                 return -EINVAL;
6242         }
6243
6244         if (instance < 0 || instance >= prog->instances.nr) {
6245                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
6246                         instance, prog->section_name, prog->instances.nr);
6247                 return -EINVAL;
6248         }
6249
6250         if (bpf_obj_pin(prog->instances.fds[instance], path)) {
6251                 err = -errno;
6252                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
6253                 pr_warn("failed to pin program: %s\n", cp);
6254                 return err;
6255         }
6256         pr_debug("pinned program '%s'\n", path);
6257
6258         return 0;
6259 }
6260
6261 int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
6262                                 int instance)
6263 {
6264         int err;
6265
6266         err = check_path(path);
6267         if (err)
6268                 return err;
6269
6270         if (prog == NULL) {
6271                 pr_warn("invalid program pointer\n");
6272                 return -EINVAL;
6273         }
6274
6275         if (instance < 0 || instance >= prog->instances.nr) {
6276                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
6277                         instance, prog->section_name, prog->instances.nr);
6278                 return -EINVAL;
6279         }
6280
6281         err = unlink(path);
6282         if (err != 0)
6283                 return -errno;
6284         pr_debug("unpinned program '%s'\n", path);
6285
6286         return 0;
6287 }
6288
6289 int bpf_program__pin(struct bpf_program *prog, const char *path)
6290 {
6291         int i, err;
6292
6293         err = make_parent_dir(path);
6294         if (err)
6295                 return err;
6296
6297         err = check_path(path);
6298         if (err)
6299                 return err;
6300
6301         if (prog == NULL) {
6302                 pr_warn("invalid program pointer\n");
6303                 return -EINVAL;
6304         }
6305
6306         if (prog->instances.nr <= 0) {
6307                 pr_warn("no instances of prog %s to pin\n",
6308                            prog->section_name);
6309                 return -EINVAL;
6310         }
6311
6312         if (prog->instances.nr == 1) {
6313                 /* don't create subdirs when pinning single instance */
6314                 return bpf_program__pin_instance(prog, path, 0);
6315         }
6316
6317         for (i = 0; i < prog->instances.nr; i++) {
6318                 char buf[PATH_MAX];
6319                 int len;
6320
6321                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
6322                 if (len < 0) {
6323                         err = -EINVAL;
6324                         goto err_unpin;
6325                 } else if (len >= PATH_MAX) {
6326                         err = -ENAMETOOLONG;
6327                         goto err_unpin;
6328                 }
6329
6330                 err = bpf_program__pin_instance(prog, buf, i);
6331                 if (err)
6332                         goto err_unpin;
6333         }
6334
6335         return 0;
6336
6337 err_unpin:
6338         for (i = i - 1; i >= 0; i--) {
6339                 char buf[PATH_MAX];
6340                 int len;
6341
6342                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
6343                 if (len < 0)
6344                         continue;
6345                 else if (len >= PATH_MAX)
6346                         continue;
6347
6348                 bpf_program__unpin_instance(prog, buf, i);
6349         }
6350
6351         rmdir(path);
6352
6353         return err;
6354 }
6355
6356 int bpf_program__unpin(struct bpf_program *prog, const char *path)
6357 {
6358         int i, err;
6359
6360         err = check_path(path);
6361         if (err)
6362                 return err;
6363
6364         if (prog == NULL) {
6365                 pr_warn("invalid program pointer\n");
6366                 return -EINVAL;
6367         }
6368
6369         if (prog->instances.nr <= 0) {
6370                 pr_warn("no instances of prog %s to pin\n",
6371                            prog->section_name);
6372                 return -EINVAL;
6373         }
6374
6375         if (prog->instances.nr == 1) {
6376                 /* don't create subdirs when pinning single instance */
6377                 return bpf_program__unpin_instance(prog, path, 0);
6378         }
6379
6380         for (i = 0; i < prog->instances.nr; i++) {
6381                 char buf[PATH_MAX];
6382                 int len;
6383
6384                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
6385                 if (len < 0)
6386                         return -EINVAL;
6387                 else if (len >= PATH_MAX)
6388                         return -ENAMETOOLONG;
6389
6390                 err = bpf_program__unpin_instance(prog, buf, i);
6391                 if (err)
6392                         return err;
6393         }
6394
6395         err = rmdir(path);
6396         if (err)
6397                 return -errno;
6398
6399         return 0;
6400 }
6401
6402 int bpf_map__pin(struct bpf_map *map, const char *path)
6403 {
6404         char *cp, errmsg[STRERR_BUFSIZE];
6405         int err;
6406
6407         if (map == NULL) {
6408                 pr_warn("invalid map pointer\n");
6409                 return -EINVAL;
6410         }
6411
6412         if (map->pin_path) {
6413                 if (path && strcmp(path, map->pin_path)) {
6414                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
6415                                 bpf_map__name(map), map->pin_path, path);
6416                         return -EINVAL;
6417                 } else if (map->pinned) {
6418                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
6419                                  bpf_map__name(map), map->pin_path);
6420                         return 0;
6421                 }
6422         } else {
6423                 if (!path) {
6424                         pr_warn("missing a path to pin map '%s' at\n",
6425                                 bpf_map__name(map));
6426                         return -EINVAL;
6427                 } else if (map->pinned) {
6428                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
6429                         return -EEXIST;
6430                 }
6431
6432                 map->pin_path = strdup(path);
6433                 if (!map->pin_path) {
6434                         err = -errno;
6435                         goto out_err;
6436                 }
6437         }
6438
6439         err = make_parent_dir(map->pin_path);
6440         if (err)
6441                 return err;
6442
6443         err = check_path(map->pin_path);
6444         if (err)
6445                 return err;
6446
6447         if (bpf_obj_pin(map->fd, map->pin_path)) {
6448                 err = -errno;
6449                 goto out_err;
6450         }
6451
6452         map->pinned = true;
6453         pr_debug("pinned map '%s'\n", map->pin_path);
6454
6455         return 0;
6456
6457 out_err:
6458         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
6459         pr_warn("failed to pin map: %s\n", cp);
6460         return err;
6461 }
6462
6463 int bpf_map__unpin(struct bpf_map *map, const char *path)
6464 {
6465         int err;
6466
6467         if (map == NULL) {
6468                 pr_warn("invalid map pointer\n");
6469                 return -EINVAL;
6470         }
6471
6472         if (map->pin_path) {
6473                 if (path && strcmp(path, map->pin_path)) {
6474                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
6475                                 bpf_map__name(map), map->pin_path, path);
6476                         return -EINVAL;
6477                 }
6478                 path = map->pin_path;
6479         } else if (!path) {
6480                 pr_warn("no path to unpin map '%s' from\n",
6481                         bpf_map__name(map));
6482                 return -EINVAL;
6483         }
6484
6485         err = check_path(path);
6486         if (err)
6487                 return err;
6488
6489         err = unlink(path);
6490         if (err != 0)
6491                 return -errno;
6492
6493         map->pinned = false;
6494         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
6495
6496         return 0;
6497 }
6498
6499 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
6500 {
6501         char *new = NULL;
6502
6503         if (path) {
6504                 new = strdup(path);
6505                 if (!new)
6506                         return -errno;
6507         }
6508
6509         free(map->pin_path);
6510         map->pin_path = new;
6511         return 0;
6512 }
6513
6514 const char *bpf_map__get_pin_path(const struct bpf_map *map)
6515 {
6516         return map->pin_path;
6517 }
6518
6519 bool bpf_map__is_pinned(const struct bpf_map *map)
6520 {
6521         return map->pinned;
6522 }
6523
6524 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
6525 {
6526         struct bpf_map *map;
6527         int err;
6528
6529         if (!obj)
6530                 return -ENOENT;
6531
6532         if (!obj->loaded) {
6533                 pr_warn("object not yet loaded; load it first\n");
6534                 return -ENOENT;
6535         }
6536
6537         bpf_object__for_each_map(map, obj) {
6538                 char *pin_path = NULL;
6539                 char buf[PATH_MAX];
6540
6541                 if (path) {
6542                         int len;
6543
6544                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
6545                                        bpf_map__name(map));
6546                         if (len < 0) {
6547                                 err = -EINVAL;
6548                                 goto err_unpin_maps;
6549                         } else if (len >= PATH_MAX) {
6550                                 err = -ENAMETOOLONG;
6551                                 goto err_unpin_maps;
6552                         }
6553                         pin_path = buf;
6554                 } else if (!map->pin_path) {
6555                         continue;
6556                 }
6557
6558                 err = bpf_map__pin(map, pin_path);
6559                 if (err)
6560                         goto err_unpin_maps;
6561         }
6562
6563         return 0;
6564
6565 err_unpin_maps:
6566         while ((map = bpf_map__prev(map, obj))) {
6567                 if (!map->pin_path)
6568                         continue;
6569
6570                 bpf_map__unpin(map, NULL);
6571         }
6572
6573         return err;
6574 }
6575
6576 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
6577 {
6578         struct bpf_map *map;
6579         int err;
6580
6581         if (!obj)
6582                 return -ENOENT;
6583
6584         bpf_object__for_each_map(map, obj) {
6585                 char *pin_path = NULL;
6586                 char buf[PATH_MAX];
6587
6588                 if (path) {
6589                         int len;
6590
6591                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
6592                                        bpf_map__name(map));
6593                         if (len < 0)
6594                                 return -EINVAL;
6595                         else if (len >= PATH_MAX)
6596                                 return -ENAMETOOLONG;
6597                         pin_path = buf;
6598                 } else if (!map->pin_path) {
6599                         continue;
6600                 }
6601
6602                 err = bpf_map__unpin(map, pin_path);
6603                 if (err)
6604                         return err;
6605         }
6606
6607         return 0;
6608 }
6609
6610 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
6611 {
6612         struct bpf_program *prog;
6613         int err;
6614
6615         if (!obj)
6616                 return -ENOENT;
6617
6618         if (!obj->loaded) {
6619                 pr_warn("object not yet loaded; load it first\n");
6620                 return -ENOENT;
6621         }
6622
6623         bpf_object__for_each_program(prog, obj) {
6624                 char buf[PATH_MAX];
6625                 int len;
6626
6627                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
6628                                prog->pin_name);
6629                 if (len < 0) {
6630                         err = -EINVAL;
6631                         goto err_unpin_programs;
6632                 } else if (len >= PATH_MAX) {
6633                         err = -ENAMETOOLONG;
6634                         goto err_unpin_programs;
6635                 }
6636
6637                 err = bpf_program__pin(prog, buf);
6638                 if (err)
6639                         goto err_unpin_programs;
6640         }
6641
6642         return 0;
6643
6644 err_unpin_programs:
6645         while ((prog = bpf_program__prev(prog, obj))) {
6646                 char buf[PATH_MAX];
6647                 int len;
6648
6649                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
6650                                prog->pin_name);
6651                 if (len < 0)
6652                         continue;
6653                 else if (len >= PATH_MAX)
6654                         continue;
6655
6656                 bpf_program__unpin(prog, buf);
6657         }
6658
6659         return err;
6660 }
6661
6662 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
6663 {
6664         struct bpf_program *prog;
6665         int err;
6666
6667         if (!obj)
6668                 return -ENOENT;
6669
6670         bpf_object__for_each_program(prog, obj) {
6671                 char buf[PATH_MAX];
6672                 int len;
6673
6674                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
6675                                prog->pin_name);
6676                 if (len < 0)
6677                         return -EINVAL;
6678                 else if (len >= PATH_MAX)
6679                         return -ENAMETOOLONG;
6680
6681                 err = bpf_program__unpin(prog, buf);
6682                 if (err)
6683                         return err;
6684         }
6685
6686         return 0;
6687 }
6688
6689 int bpf_object__pin(struct bpf_object *obj, const char *path)
6690 {
6691         int err;
6692
6693         err = bpf_object__pin_maps(obj, path);
6694         if (err)
6695                 return err;
6696
6697         err = bpf_object__pin_programs(obj, path);
6698         if (err) {
6699                 bpf_object__unpin_maps(obj, path);
6700                 return err;
6701         }
6702
6703         return 0;
6704 }
6705
6706 static void bpf_map__destroy(struct bpf_map *map)
6707 {
6708         if (map->clear_priv)
6709                 map->clear_priv(map, map->priv);
6710         map->priv = NULL;
6711         map->clear_priv = NULL;
6712
6713         if (map->inner_map) {
6714                 bpf_map__destroy(map->inner_map);
6715                 zfree(&map->inner_map);
6716         }
6717
6718         zfree(&map->init_slots);
6719         map->init_slots_sz = 0;
6720
6721         if (map->mmaped) {
6722                 munmap(map->mmaped, bpf_map_mmap_sz(map));
6723                 map->mmaped = NULL;
6724         }
6725
6726         if (map->st_ops) {
6727                 zfree(&map->st_ops->data);
6728                 zfree(&map->st_ops->progs);
6729                 zfree(&map->st_ops->kern_func_off);
6730                 zfree(&map->st_ops);
6731         }
6732
6733         zfree(&map->name);
6734         zfree(&map->pin_path);
6735
6736         if (map->fd >= 0)
6737                 zclose(map->fd);
6738 }
6739
6740 void bpf_object__close(struct bpf_object *obj)
6741 {
6742         size_t i;
6743
6744         if (IS_ERR_OR_NULL(obj))
6745                 return;
6746
6747         if (obj->clear_priv)
6748                 obj->clear_priv(obj, obj->priv);
6749
6750         bpf_object__elf_finish(obj);
6751         bpf_object__unload(obj);
6752         btf__free(obj->btf);
6753         btf_ext__free(obj->btf_ext);
6754
6755         for (i = 0; i < obj->nr_maps; i++)
6756                 bpf_map__destroy(&obj->maps[i]);
6757
6758         zfree(&obj->kconfig);
6759         zfree(&obj->externs);
6760         obj->nr_extern = 0;
6761
6762         zfree(&obj->maps);
6763         obj->nr_maps = 0;
6764
6765         if (obj->programs && obj->nr_programs) {
6766                 for (i = 0; i < obj->nr_programs; i++)
6767                         bpf_program__exit(&obj->programs[i]);
6768         }
6769         zfree(&obj->programs);
6770
6771         list_del(&obj->list);
6772         free(obj);
6773 }
6774
6775 struct bpf_object *
6776 bpf_object__next(struct bpf_object *prev)
6777 {
6778         struct bpf_object *next;
6779
6780         if (!prev)
6781                 next = list_first_entry(&bpf_objects_list,
6782                                         struct bpf_object,
6783                                         list);
6784         else
6785                 next = list_next_entry(prev, list);
6786
6787         /* Empty list is noticed here so don't need checking on entry. */
6788         if (&next->list == &bpf_objects_list)
6789                 return NULL;
6790
6791         return next;
6792 }
6793
6794 const char *bpf_object__name(const struct bpf_object *obj)
6795 {
6796         return obj ? obj->name : ERR_PTR(-EINVAL);
6797 }
6798
6799 unsigned int bpf_object__kversion(const struct bpf_object *obj)
6800 {
6801         return obj ? obj->kern_version : 0;
6802 }
6803
6804 struct btf *bpf_object__btf(const struct bpf_object *obj)
6805 {
6806         return obj ? obj->btf : NULL;
6807 }
6808
6809 int bpf_object__btf_fd(const struct bpf_object *obj)
6810 {
6811         return obj->btf ? btf__fd(obj->btf) : -1;
6812 }
6813
6814 int bpf_object__set_priv(struct bpf_object *obj, void *priv,
6815                          bpf_object_clear_priv_t clear_priv)
6816 {
6817         if (obj->priv && obj->clear_priv)
6818                 obj->clear_priv(obj, obj->priv);
6819
6820         obj->priv = priv;
6821         obj->clear_priv = clear_priv;
6822         return 0;
6823 }
6824
6825 void *bpf_object__priv(const struct bpf_object *obj)
6826 {
6827         return obj ? obj->priv : ERR_PTR(-EINVAL);
6828 }
6829
6830 static struct bpf_program *
6831 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
6832                     bool forward)
6833 {
6834         size_t nr_programs = obj->nr_programs;
6835         ssize_t idx;
6836
6837         if (!nr_programs)
6838                 return NULL;
6839
6840         if (!p)
6841                 /* Iter from the beginning */
6842                 return forward ? &obj->programs[0] :
6843                         &obj->programs[nr_programs - 1];
6844
6845         if (p->obj != obj) {
6846                 pr_warn("error: program handler doesn't match object\n");
6847                 return NULL;
6848         }
6849
6850         idx = (p - obj->programs) + (forward ? 1 : -1);
6851         if (idx >= obj->nr_programs || idx < 0)
6852                 return NULL;
6853         return &obj->programs[idx];
6854 }
6855
6856 struct bpf_program *
6857 bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
6858 {
6859         struct bpf_program *prog = prev;
6860
6861         do {
6862                 prog = __bpf_program__iter(prog, obj, true);
6863         } while (prog && bpf_program__is_function_storage(prog, obj));
6864
6865         return prog;
6866 }
6867
6868 struct bpf_program *
6869 bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
6870 {
6871         struct bpf_program *prog = next;
6872
6873         do {
6874                 prog = __bpf_program__iter(prog, obj, false);
6875         } while (prog && bpf_program__is_function_storage(prog, obj));
6876
6877         return prog;
6878 }
6879
6880 int bpf_program__set_priv(struct bpf_program *prog, void *priv,
6881                           bpf_program_clear_priv_t clear_priv)
6882 {
6883         if (prog->priv && prog->clear_priv)
6884                 prog->clear_priv(prog, prog->priv);
6885
6886         prog->priv = priv;
6887         prog->clear_priv = clear_priv;
6888         return 0;
6889 }
6890
6891 void *bpf_program__priv(const struct bpf_program *prog)
6892 {
6893         return prog ? prog->priv : ERR_PTR(-EINVAL);
6894 }
6895
6896 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
6897 {
6898         prog->prog_ifindex = ifindex;
6899 }
6900
6901 const char *bpf_program__name(const struct bpf_program *prog)
6902 {
6903         return prog->name;
6904 }
6905
6906 const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
6907 {
6908         const char *title;
6909
6910         title = prog->section_name;
6911         if (needs_copy) {
6912                 title = strdup(title);
6913                 if (!title) {
6914                         pr_warn("failed to strdup program title\n");
6915                         return ERR_PTR(-ENOMEM);
6916                 }
6917         }
6918
6919         return title;
6920 }
6921
6922 bool bpf_program__autoload(const struct bpf_program *prog)
6923 {
6924         return prog->load;
6925 }
6926
6927 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
6928 {
6929         if (prog->obj->loaded)
6930                 return -EINVAL;
6931
6932         prog->load = autoload;
6933         return 0;
6934 }
6935
6936 int bpf_program__fd(const struct bpf_program *prog)
6937 {
6938         return bpf_program__nth_fd(prog, 0);
6939 }
6940
6941 size_t bpf_program__size(const struct bpf_program *prog)
6942 {
6943         return prog->insns_cnt * sizeof(struct bpf_insn);
6944 }
6945
6946 int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
6947                           bpf_program_prep_t prep)
6948 {
6949         int *instances_fds;
6950
6951         if (nr_instances <= 0 || !prep)
6952                 return -EINVAL;
6953
6954         if (prog->instances.nr > 0 || prog->instances.fds) {
6955                 pr_warn("Can't set pre-processor after loading\n");
6956                 return -EINVAL;
6957         }
6958
6959         instances_fds = malloc(sizeof(int) * nr_instances);
6960         if (!instances_fds) {
6961                 pr_warn("alloc memory failed for fds\n");
6962                 return -ENOMEM;
6963         }
6964
6965         /* fill all fd with -1 */
6966         memset(instances_fds, -1, sizeof(int) * nr_instances);
6967
6968         prog->instances.nr = nr_instances;
6969         prog->instances.fds = instances_fds;
6970         prog->preprocessor = prep;
6971         return 0;
6972 }
6973
6974 int bpf_program__nth_fd(const struct bpf_program *prog, int n)
6975 {
6976         int fd;
6977
6978         if (!prog)
6979                 return -EINVAL;
6980
6981         if (n >= prog->instances.nr || n < 0) {
6982                 pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
6983                         n, prog->section_name, prog->instances.nr);
6984                 return -EINVAL;
6985         }
6986
6987         fd = prog->instances.fds[n];
6988         if (fd < 0) {
6989                 pr_warn("%dth instance of program '%s' is invalid\n",
6990                         n, prog->section_name);
6991                 return -ENOENT;
6992         }
6993
6994         return fd;
6995 }
6996
6997 enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
6998 {
6999         return prog->type;
7000 }
7001
7002 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
7003 {
7004         prog->type = type;
7005 }
7006
7007 static bool bpf_program__is_type(const struct bpf_program *prog,
7008                                  enum bpf_prog_type type)
7009 {
7010         return prog ? (prog->type == type) : false;
7011 }
7012
7013 #define BPF_PROG_TYPE_FNS(NAME, TYPE)                           \
7014 int bpf_program__set_##NAME(struct bpf_program *prog)           \
7015 {                                                               \
7016         if (!prog)                                              \
7017                 return -EINVAL;                                 \
7018         bpf_program__set_type(prog, TYPE);                      \
7019         return 0;                                               \
7020 }                                                               \
7021                                                                 \
7022 bool bpf_program__is_##NAME(const struct bpf_program *prog)     \
7023 {                                                               \
7024         return bpf_program__is_type(prog, TYPE);                \
7025 }                                                               \
7026
7027 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
7028 BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
7029 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
7030 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
7031 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
7032 BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
7033 BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
7034 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
7035 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
7036 BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
7037 BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
7038 BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
7039 BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
7040
7041 enum bpf_attach_type
7042 bpf_program__get_expected_attach_type(struct bpf_program *prog)
7043 {
7044         return prog->expected_attach_type;
7045 }
7046
7047 void bpf_program__set_expected_attach_type(struct bpf_program *prog,
7048                                            enum bpf_attach_type type)
7049 {
7050         prog->expected_attach_type = type;
7051 }
7052
7053 #define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional,           \
7054                           attachable, attach_btf)                           \
7055         {                                                                   \
7056                 .sec = string,                                              \
7057                 .len = sizeof(string) - 1,                                  \
7058                 .prog_type = ptype,                                         \
7059                 .expected_attach_type = eatype,                             \
7060                 .is_exp_attach_type_optional = eatype_optional,             \
7061                 .is_attachable = attachable,                                \
7062                 .is_attach_btf = attach_btf,                                \
7063         }
7064
7065 /* Programs that can NOT be attached. */
7066 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
7067
7068 /* Programs that can be attached. */
7069 #define BPF_APROG_SEC(string, ptype, atype) \
7070         BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0)
7071
7072 /* Programs that must specify expected attach type at load time. */
7073 #define BPF_EAPROG_SEC(string, ptype, eatype) \
7074         BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0)
7075
7076 /* Programs that use BTF to identify attach point */
7077 #define BPF_PROG_BTF(string, ptype, eatype) \
7078         BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1)
7079
7080 /* Programs that can be attached but attach type can't be identified by section
7081  * name. Kept for backward compatibility.
7082  */
7083 #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
7084
7085 #define SEC_DEF(sec_pfx, ptype, ...) {                                      \
7086         .sec = sec_pfx,                                                     \
7087         .len = sizeof(sec_pfx) - 1,                                         \
7088         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
7089         __VA_ARGS__                                                         \
7090 }
7091
7092 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
7093                                       struct bpf_program *prog);
7094 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
7095                                   struct bpf_program *prog);
7096 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
7097                                       struct bpf_program *prog);
7098 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
7099                                      struct bpf_program *prog);
7100 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
7101                                    struct bpf_program *prog);
7102 static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
7103                                     struct bpf_program *prog);
7104
7105 static const struct bpf_sec_def section_defs[] = {
7106         BPF_PROG_SEC("socket",                  BPF_PROG_TYPE_SOCKET_FILTER),
7107         BPF_PROG_SEC("sk_reuseport",            BPF_PROG_TYPE_SK_REUSEPORT),
7108         SEC_DEF("kprobe/", KPROBE,
7109                 .attach_fn = attach_kprobe),
7110         BPF_PROG_SEC("uprobe/",                 BPF_PROG_TYPE_KPROBE),
7111         SEC_DEF("kretprobe/", KPROBE,
7112                 .attach_fn = attach_kprobe),
7113         BPF_PROG_SEC("uretprobe/",              BPF_PROG_TYPE_KPROBE),
7114         BPF_PROG_SEC("classifier",              BPF_PROG_TYPE_SCHED_CLS),
7115         BPF_PROG_SEC("action",                  BPF_PROG_TYPE_SCHED_ACT),
7116         SEC_DEF("tracepoint/", TRACEPOINT,
7117                 .attach_fn = attach_tp),
7118         SEC_DEF("tp/", TRACEPOINT,
7119                 .attach_fn = attach_tp),
7120         SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
7121                 .attach_fn = attach_raw_tp),
7122         SEC_DEF("raw_tp/", RAW_TRACEPOINT,
7123                 .attach_fn = attach_raw_tp),
7124         SEC_DEF("tp_btf/", TRACING,
7125                 .expected_attach_type = BPF_TRACE_RAW_TP,
7126                 .is_attach_btf = true,
7127                 .attach_fn = attach_trace),
7128         SEC_DEF("fentry/", TRACING,
7129                 .expected_attach_type = BPF_TRACE_FENTRY,
7130                 .is_attach_btf = true,
7131                 .attach_fn = attach_trace),
7132         SEC_DEF("fmod_ret/", TRACING,
7133                 .expected_attach_type = BPF_MODIFY_RETURN,
7134                 .is_attach_btf = true,
7135                 .attach_fn = attach_trace),
7136         SEC_DEF("fexit/", TRACING,
7137                 .expected_attach_type = BPF_TRACE_FEXIT,
7138                 .is_attach_btf = true,
7139                 .attach_fn = attach_trace),
7140         SEC_DEF("freplace/", EXT,
7141                 .is_attach_btf = true,
7142                 .attach_fn = attach_trace),
7143         SEC_DEF("lsm/", LSM,
7144                 .is_attach_btf = true,
7145                 .expected_attach_type = BPF_LSM_MAC,
7146                 .attach_fn = attach_lsm),
7147         SEC_DEF("iter/", TRACING,
7148                 .expected_attach_type = BPF_TRACE_ITER,
7149                 .is_attach_btf = true,
7150                 .attach_fn = attach_iter),
7151         BPF_EAPROG_SEC("xdp_devmap/",           BPF_PROG_TYPE_XDP,
7152                                                 BPF_XDP_DEVMAP),
7153         BPF_EAPROG_SEC("xdp_cpumap/",           BPF_PROG_TYPE_XDP,
7154                                                 BPF_XDP_CPUMAP),
7155         BPF_EAPROG_SEC("xdp",                   BPF_PROG_TYPE_XDP,
7156                                                 BPF_XDP),
7157         BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
7158         BPF_PROG_SEC("lwt_in",                  BPF_PROG_TYPE_LWT_IN),
7159         BPF_PROG_SEC("lwt_out",                 BPF_PROG_TYPE_LWT_OUT),
7160         BPF_PROG_SEC("lwt_xmit",                BPF_PROG_TYPE_LWT_XMIT),
7161         BPF_PROG_SEC("lwt_seg6local",           BPF_PROG_TYPE_LWT_SEG6LOCAL),
7162         BPF_APROG_SEC("cgroup_skb/ingress",     BPF_PROG_TYPE_CGROUP_SKB,
7163                                                 BPF_CGROUP_INET_INGRESS),
7164         BPF_APROG_SEC("cgroup_skb/egress",      BPF_PROG_TYPE_CGROUP_SKB,
7165                                                 BPF_CGROUP_INET_EGRESS),
7166         BPF_APROG_COMPAT("cgroup/skb",          BPF_PROG_TYPE_CGROUP_SKB),
7167         BPF_EAPROG_SEC("cgroup/sock_create",    BPF_PROG_TYPE_CGROUP_SOCK,
7168                                                 BPF_CGROUP_INET_SOCK_CREATE),
7169         BPF_EAPROG_SEC("cgroup/sock_release",   BPF_PROG_TYPE_CGROUP_SOCK,
7170                                                 BPF_CGROUP_INET_SOCK_RELEASE),
7171         BPF_APROG_SEC("cgroup/sock",            BPF_PROG_TYPE_CGROUP_SOCK,
7172                                                 BPF_CGROUP_INET_SOCK_CREATE),
7173         BPF_EAPROG_SEC("cgroup/post_bind4",     BPF_PROG_TYPE_CGROUP_SOCK,
7174                                                 BPF_CGROUP_INET4_POST_BIND),
7175         BPF_EAPROG_SEC("cgroup/post_bind6",     BPF_PROG_TYPE_CGROUP_SOCK,
7176                                                 BPF_CGROUP_INET6_POST_BIND),
7177         BPF_APROG_SEC("cgroup/dev",             BPF_PROG_TYPE_CGROUP_DEVICE,
7178                                                 BPF_CGROUP_DEVICE),
7179         BPF_APROG_SEC("sockops",                BPF_PROG_TYPE_SOCK_OPS,
7180                                                 BPF_CGROUP_SOCK_OPS),
7181         BPF_APROG_SEC("sk_skb/stream_parser",   BPF_PROG_TYPE_SK_SKB,
7182                                                 BPF_SK_SKB_STREAM_PARSER),
7183         BPF_APROG_SEC("sk_skb/stream_verdict",  BPF_PROG_TYPE_SK_SKB,
7184                                                 BPF_SK_SKB_STREAM_VERDICT),
7185         BPF_APROG_COMPAT("sk_skb",              BPF_PROG_TYPE_SK_SKB),
7186         BPF_APROG_SEC("sk_msg",                 BPF_PROG_TYPE_SK_MSG,
7187                                                 BPF_SK_MSG_VERDICT),
7188         BPF_APROG_SEC("lirc_mode2",             BPF_PROG_TYPE_LIRC_MODE2,
7189                                                 BPF_LIRC_MODE2),
7190         BPF_APROG_SEC("flow_dissector",         BPF_PROG_TYPE_FLOW_DISSECTOR,
7191                                                 BPF_FLOW_DISSECTOR),
7192         BPF_EAPROG_SEC("cgroup/bind4",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7193                                                 BPF_CGROUP_INET4_BIND),
7194         BPF_EAPROG_SEC("cgroup/bind6",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7195                                                 BPF_CGROUP_INET6_BIND),
7196         BPF_EAPROG_SEC("cgroup/connect4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7197                                                 BPF_CGROUP_INET4_CONNECT),
7198         BPF_EAPROG_SEC("cgroup/connect6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7199                                                 BPF_CGROUP_INET6_CONNECT),
7200         BPF_EAPROG_SEC("cgroup/sendmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7201                                                 BPF_CGROUP_UDP4_SENDMSG),
7202         BPF_EAPROG_SEC("cgroup/sendmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7203                                                 BPF_CGROUP_UDP6_SENDMSG),
7204         BPF_EAPROG_SEC("cgroup/recvmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7205                                                 BPF_CGROUP_UDP4_RECVMSG),
7206         BPF_EAPROG_SEC("cgroup/recvmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7207                                                 BPF_CGROUP_UDP6_RECVMSG),
7208         BPF_EAPROG_SEC("cgroup/getpeername4",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7209                                                 BPF_CGROUP_INET4_GETPEERNAME),
7210         BPF_EAPROG_SEC("cgroup/getpeername6",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7211                                                 BPF_CGROUP_INET6_GETPEERNAME),
7212         BPF_EAPROG_SEC("cgroup/getsockname4",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7213                                                 BPF_CGROUP_INET4_GETSOCKNAME),
7214         BPF_EAPROG_SEC("cgroup/getsockname6",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
7215                                                 BPF_CGROUP_INET6_GETSOCKNAME),
7216         BPF_EAPROG_SEC("cgroup/sysctl",         BPF_PROG_TYPE_CGROUP_SYSCTL,
7217                                                 BPF_CGROUP_SYSCTL),
7218         BPF_EAPROG_SEC("cgroup/getsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
7219                                                 BPF_CGROUP_GETSOCKOPT),
7220         BPF_EAPROG_SEC("cgroup/setsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
7221                                                 BPF_CGROUP_SETSOCKOPT),
7222         BPF_PROG_SEC("struct_ops",              BPF_PROG_TYPE_STRUCT_OPS),
7223         BPF_EAPROG_SEC("sk_lookup/",            BPF_PROG_TYPE_SK_LOOKUP,
7224                                                 BPF_SK_LOOKUP),
7225 };
7226
7227 #undef BPF_PROG_SEC_IMPL
7228 #undef BPF_PROG_SEC
7229 #undef BPF_APROG_SEC
7230 #undef BPF_EAPROG_SEC
7231 #undef BPF_APROG_COMPAT
7232 #undef SEC_DEF
7233
7234 #define MAX_TYPE_NAME_SIZE 32
7235
7236 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
7237 {
7238         int i, n = ARRAY_SIZE(section_defs);
7239
7240         for (i = 0; i < n; i++) {
7241                 if (strncmp(sec_name,
7242                             section_defs[i].sec, section_defs[i].len))
7243                         continue;
7244                 return &section_defs[i];
7245         }
7246         return NULL;
7247 }
7248
7249 static char *libbpf_get_type_names(bool attach_type)
7250 {
7251         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
7252         char *buf;
7253
7254         buf = malloc(len);
7255         if (!buf)
7256                 return NULL;
7257
7258         buf[0] = '\0';
7259         /* Forge string buf with all available names */
7260         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
7261                 if (attach_type && !section_defs[i].is_attachable)
7262                         continue;
7263
7264                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
7265                         free(buf);
7266                         return NULL;
7267                 }
7268                 strcat(buf, " ");
7269                 strcat(buf, section_defs[i].sec);
7270         }
7271
7272         return buf;
7273 }
7274
7275 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
7276                              enum bpf_attach_type *expected_attach_type)
7277 {
7278         const struct bpf_sec_def *sec_def;
7279         char *type_names;
7280
7281         if (!name)
7282                 return -EINVAL;
7283
7284         sec_def = find_sec_def(name);
7285         if (sec_def) {
7286                 *prog_type = sec_def->prog_type;
7287                 *expected_attach_type = sec_def->expected_attach_type;
7288                 return 0;
7289         }
7290
7291         pr_debug("failed to guess program type from ELF section '%s'\n", name);
7292         type_names = libbpf_get_type_names(false);
7293         if (type_names != NULL) {
7294                 pr_debug("supported section(type) names are:%s\n", type_names);
7295                 free(type_names);
7296         }
7297
7298         return -ESRCH;
7299 }
7300
7301 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
7302                                                      size_t offset)
7303 {
7304         struct bpf_map *map;
7305         size_t i;
7306
7307         for (i = 0; i < obj->nr_maps; i++) {
7308                 map = &obj->maps[i];
7309                 if (!bpf_map__is_struct_ops(map))
7310                         continue;
7311                 if (map->sec_offset <= offset &&
7312                     offset - map->sec_offset < map->def.value_size)
7313                         return map;
7314         }
7315
7316         return NULL;
7317 }
7318
7319 /* Collect the reloc from ELF and populate the st_ops->progs[] */
7320 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
7321                                             GElf_Shdr *shdr, Elf_Data *data)
7322 {
7323         const struct btf_member *member;
7324         struct bpf_struct_ops *st_ops;
7325         struct bpf_program *prog;
7326         unsigned int shdr_idx;
7327         const struct btf *btf;
7328         struct bpf_map *map;
7329         Elf_Data *symbols;
7330         unsigned int moff;
7331         const char *name;
7332         __u32 member_idx;
7333         GElf_Sym sym;
7334         GElf_Rel rel;
7335         int i, nrels;
7336
7337         symbols = obj->efile.symbols;
7338         btf = obj->btf;
7339         nrels = shdr->sh_size / shdr->sh_entsize;
7340         for (i = 0; i < nrels; i++) {
7341                 if (!gelf_getrel(data, i, &rel)) {
7342                         pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
7343                         return -LIBBPF_ERRNO__FORMAT;
7344                 }
7345
7346                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
7347                         pr_warn("struct_ops reloc: symbol %zx not found\n",
7348                                 (size_t)GELF_R_SYM(rel.r_info));
7349                         return -LIBBPF_ERRNO__FORMAT;
7350                 }
7351
7352                 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
7353                                   sym.st_name) ? : "<?>";
7354                 map = find_struct_ops_map_by_offset(obj, rel.r_offset);
7355                 if (!map) {
7356                         pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
7357                                 (size_t)rel.r_offset);
7358                         return -EINVAL;
7359                 }
7360
7361                 moff = rel.r_offset - map->sec_offset;
7362                 shdr_idx = sym.st_shndx;
7363                 st_ops = map->st_ops;
7364                 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
7365                          map->name,
7366                          (long long)(rel.r_info >> 32),
7367                          (long long)sym.st_value,
7368                          shdr_idx, (size_t)rel.r_offset,
7369                          map->sec_offset, sym.st_name, name);
7370
7371                 if (shdr_idx >= SHN_LORESERVE) {
7372                         pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n",
7373                                 map->name, (size_t)rel.r_offset, shdr_idx);
7374                         return -LIBBPF_ERRNO__RELOC;
7375                 }
7376
7377                 member = find_member_by_offset(st_ops->type, moff * 8);
7378                 if (!member) {
7379                         pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
7380                                 map->name, moff);
7381                         return -EINVAL;
7382                 }
7383                 member_idx = member - btf_members(st_ops->type);
7384                 name = btf__name_by_offset(btf, member->name_off);
7385
7386                 if (!resolve_func_ptr(btf, member->type, NULL)) {
7387                         pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
7388                                 map->name, name);
7389                         return -EINVAL;
7390                 }
7391
7392                 prog = bpf_object__find_prog_by_idx(obj, shdr_idx);
7393                 if (!prog) {
7394                         pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
7395                                 map->name, shdr_idx, name);
7396                         return -EINVAL;
7397                 }
7398
7399                 if (prog->type == BPF_PROG_TYPE_UNSPEC) {
7400                         const struct bpf_sec_def *sec_def;
7401
7402                         sec_def = find_sec_def(prog->section_name);
7403                         if (sec_def &&
7404                             sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
7405                                 /* for pr_warn */
7406                                 prog->type = sec_def->prog_type;
7407                                 goto invalid_prog;
7408                         }
7409
7410                         prog->type = BPF_PROG_TYPE_STRUCT_OPS;
7411                         prog->attach_btf_id = st_ops->type_id;
7412                         prog->expected_attach_type = member_idx;
7413                 } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
7414                            prog->attach_btf_id != st_ops->type_id ||
7415                            prog->expected_attach_type != member_idx) {
7416                         goto invalid_prog;
7417                 }
7418                 st_ops->progs[member_idx] = prog;
7419         }
7420
7421         return 0;
7422
7423 invalid_prog:
7424         pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
7425                 map->name, prog->name, prog->section_name, prog->type,
7426                 prog->attach_btf_id, prog->expected_attach_type, name);
7427         return -EINVAL;
7428 }
7429
7430 #define BTF_TRACE_PREFIX "btf_trace_"
7431 #define BTF_LSM_PREFIX "bpf_lsm_"
7432 #define BTF_ITER_PREFIX "bpf_iter_"
7433 #define BTF_MAX_NAME_SIZE 128
7434
7435 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
7436                                    const char *name, __u32 kind)
7437 {
7438         char btf_type_name[BTF_MAX_NAME_SIZE];
7439         int ret;
7440
7441         ret = snprintf(btf_type_name, sizeof(btf_type_name),
7442                        "%s%s", prefix, name);
7443         /* snprintf returns the number of characters written excluding the
7444          * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
7445          * indicates truncation.
7446          */
7447         if (ret < 0 || ret >= sizeof(btf_type_name))
7448                 return -ENAMETOOLONG;
7449         return btf__find_by_name_kind(btf, btf_type_name, kind);
7450 }
7451
7452 static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
7453                                         enum bpf_attach_type attach_type)
7454 {
7455         int err;
7456
7457         if (attach_type == BPF_TRACE_RAW_TP)
7458                 err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
7459                                               BTF_KIND_TYPEDEF);
7460         else if (attach_type == BPF_LSM_MAC)
7461                 err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
7462                                               BTF_KIND_FUNC);
7463         else if (attach_type == BPF_TRACE_ITER)
7464                 err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
7465                                               BTF_KIND_FUNC);
7466         else
7467                 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
7468
7469         if (err <= 0)
7470                 pr_warn("%s is not found in vmlinux BTF\n", name);
7471
7472         return err;
7473 }
7474
7475 int libbpf_find_vmlinux_btf_id(const char *name,
7476                                enum bpf_attach_type attach_type)
7477 {
7478         struct btf *btf;
7479         int err;
7480
7481         btf = libbpf_find_kernel_btf();
7482         if (IS_ERR(btf)) {
7483                 pr_warn("vmlinux BTF is not found\n");
7484                 return -EINVAL;
7485         }
7486
7487         err = __find_vmlinux_btf_id(btf, name, attach_type);
7488         btf__free(btf);
7489         return err;
7490 }
7491
7492 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
7493 {
7494         struct bpf_prog_info_linear *info_linear;
7495         struct bpf_prog_info *info;
7496         struct btf *btf = NULL;
7497         int err = -EINVAL;
7498
7499         info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
7500         if (IS_ERR_OR_NULL(info_linear)) {
7501                 pr_warn("failed get_prog_info_linear for FD %d\n",
7502                         attach_prog_fd);
7503                 return -EINVAL;
7504         }
7505         info = &info_linear->info;
7506         if (!info->btf_id) {
7507                 pr_warn("The target program doesn't have BTF\n");
7508                 goto out;
7509         }
7510         if (btf__get_from_id(info->btf_id, &btf)) {
7511                 pr_warn("Failed to get BTF of the program\n");
7512                 goto out;
7513         }
7514         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
7515         btf__free(btf);
7516         if (err <= 0) {
7517                 pr_warn("%s is not found in prog's BTF\n", name);
7518                 goto out;
7519         }
7520 out:
7521         free(info_linear);
7522         return err;
7523 }
7524
7525 static int libbpf_find_attach_btf_id(struct bpf_program *prog)
7526 {
7527         enum bpf_attach_type attach_type = prog->expected_attach_type;
7528         __u32 attach_prog_fd = prog->attach_prog_fd;
7529         const char *name = prog->section_name;
7530         int i, err;
7531
7532         if (!name)
7533                 return -EINVAL;
7534
7535         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
7536                 if (!section_defs[i].is_attach_btf)
7537                         continue;
7538                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
7539                         continue;
7540                 if (attach_prog_fd)
7541                         err = libbpf_find_prog_btf_id(name + section_defs[i].len,
7542                                                       attach_prog_fd);
7543                 else
7544                         err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
7545                                                     name + section_defs[i].len,
7546                                                     attach_type);
7547                 return err;
7548         }
7549         pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
7550         return -ESRCH;
7551 }
7552
7553 int libbpf_attach_type_by_name(const char *name,
7554                                enum bpf_attach_type *attach_type)
7555 {
7556         char *type_names;
7557         int i;
7558
7559         if (!name)
7560                 return -EINVAL;
7561
7562         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
7563                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
7564                         continue;
7565                 if (!section_defs[i].is_attachable)
7566                         return -EINVAL;
7567                 *attach_type = section_defs[i].expected_attach_type;
7568                 return 0;
7569         }
7570         pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
7571         type_names = libbpf_get_type_names(true);
7572         if (type_names != NULL) {
7573                 pr_debug("attachable section(type) names are:%s\n", type_names);
7574                 free(type_names);
7575         }
7576
7577         return -EINVAL;
7578 }
7579
7580 int bpf_map__fd(const struct bpf_map *map)
7581 {
7582         return map ? map->fd : -EINVAL;
7583 }
7584
7585 const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
7586 {
7587         return map ? &map->def : ERR_PTR(-EINVAL);
7588 }
7589
7590 const char *bpf_map__name(const struct bpf_map *map)
7591 {
7592         return map ? map->name : NULL;
7593 }
7594
7595 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
7596 {
7597         return map->def.type;
7598 }
7599
7600 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
7601 {
7602         if (map->fd >= 0)
7603                 return -EBUSY;
7604         map->def.type = type;
7605         return 0;
7606 }
7607
7608 __u32 bpf_map__map_flags(const struct bpf_map *map)
7609 {
7610         return map->def.map_flags;
7611 }
7612
7613 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
7614 {
7615         if (map->fd >= 0)
7616                 return -EBUSY;
7617         map->def.map_flags = flags;
7618         return 0;
7619 }
7620
7621 __u32 bpf_map__numa_node(const struct bpf_map *map)
7622 {
7623         return map->numa_node;
7624 }
7625
7626 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
7627 {
7628         if (map->fd >= 0)
7629                 return -EBUSY;
7630         map->numa_node = numa_node;
7631         return 0;
7632 }
7633
7634 __u32 bpf_map__key_size(const struct bpf_map *map)
7635 {
7636         return map->def.key_size;
7637 }
7638
7639 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
7640 {
7641         if (map->fd >= 0)
7642                 return -EBUSY;
7643         map->def.key_size = size;
7644         return 0;
7645 }
7646
7647 __u32 bpf_map__value_size(const struct bpf_map *map)
7648 {
7649         return map->def.value_size;
7650 }
7651
7652 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
7653 {
7654         if (map->fd >= 0)
7655                 return -EBUSY;
7656         map->def.value_size = size;
7657         return 0;
7658 }
7659
7660 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
7661 {
7662         return map ? map->btf_key_type_id : 0;
7663 }
7664
7665 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
7666 {
7667         return map ? map->btf_value_type_id : 0;
7668 }
7669
7670 int bpf_map__set_priv(struct bpf_map *map, void *priv,
7671                      bpf_map_clear_priv_t clear_priv)
7672 {
7673         if (!map)
7674                 return -EINVAL;
7675
7676         if (map->priv) {
7677                 if (map->clear_priv)
7678                         map->clear_priv(map, map->priv);
7679         }
7680
7681         map->priv = priv;
7682         map->clear_priv = clear_priv;
7683         return 0;
7684 }
7685
7686 void *bpf_map__priv(const struct bpf_map *map)
7687 {
7688         return map ? map->priv : ERR_PTR(-EINVAL);
7689 }
7690
7691 int bpf_map__set_initial_value(struct bpf_map *map,
7692                                const void *data, size_t size)
7693 {
7694         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
7695             size != map->def.value_size || map->fd >= 0)
7696                 return -EINVAL;
7697
7698         memcpy(map->mmaped, data, size);
7699         return 0;
7700 }
7701
7702 bool bpf_map__is_offload_neutral(const struct bpf_map *map)
7703 {
7704         return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
7705 }
7706
7707 bool bpf_map__is_internal(const struct bpf_map *map)
7708 {
7709         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
7710 }
7711
7712 __u32 bpf_map__ifindex(const struct bpf_map *map)
7713 {
7714         return map->map_ifindex;
7715 }
7716
7717 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
7718 {
7719         if (map->fd >= 0)
7720                 return -EBUSY;
7721         map->map_ifindex = ifindex;
7722         return 0;
7723 }
7724
7725 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
7726 {
7727         if (!bpf_map_type__is_map_in_map(map->def.type)) {
7728                 pr_warn("error: unsupported map type\n");
7729                 return -EINVAL;
7730         }
7731         if (map->inner_map_fd != -1) {
7732                 pr_warn("error: inner_map_fd already specified\n");
7733                 return -EINVAL;
7734         }
7735         map->inner_map_fd = fd;
7736         return 0;
7737 }
7738
7739 static struct bpf_map *
7740 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
7741 {
7742         ssize_t idx;
7743         struct bpf_map *s, *e;
7744
7745         if (!obj || !obj->maps)
7746                 return NULL;
7747
7748         s = obj->maps;
7749         e = obj->maps + obj->nr_maps;
7750
7751         if ((m < s) || (m >= e)) {
7752                 pr_warn("error in %s: map handler doesn't belong to object\n",
7753                          __func__);
7754                 return NULL;
7755         }
7756
7757         idx = (m - obj->maps) + i;
7758         if (idx >= obj->nr_maps || idx < 0)
7759                 return NULL;
7760         return &obj->maps[idx];
7761 }
7762
7763 struct bpf_map *
7764 bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
7765 {
7766         if (prev == NULL)
7767                 return obj->maps;
7768
7769         return __bpf_map__iter(prev, obj, 1);
7770 }
7771
7772 struct bpf_map *
7773 bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
7774 {
7775         if (next == NULL) {
7776                 if (!obj->nr_maps)
7777                         return NULL;
7778                 return obj->maps + obj->nr_maps - 1;
7779         }
7780
7781         return __bpf_map__iter(next, obj, -1);
7782 }
7783
7784 struct bpf_map *
7785 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
7786 {
7787         struct bpf_map *pos;
7788
7789         bpf_object__for_each_map(pos, obj) {
7790                 if (pos->name && !strcmp(pos->name, name))
7791                         return pos;
7792         }
7793         return NULL;
7794 }
7795
7796 int
7797 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
7798 {
7799         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
7800 }
7801
7802 struct bpf_map *
7803 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
7804 {
7805         return ERR_PTR(-ENOTSUP);
7806 }
7807
7808 long libbpf_get_error(const void *ptr)
7809 {
7810         return PTR_ERR_OR_ZERO(ptr);
7811 }
7812
7813 int bpf_prog_load(const char *file, enum bpf_prog_type type,
7814                   struct bpf_object **pobj, int *prog_fd)
7815 {
7816         struct bpf_prog_load_attr attr;
7817
7818         memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
7819         attr.file = file;
7820         attr.prog_type = type;
7821         attr.expected_attach_type = 0;
7822
7823         return bpf_prog_load_xattr(&attr, pobj, prog_fd);
7824 }
7825
7826 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
7827                         struct bpf_object **pobj, int *prog_fd)
7828 {
7829         struct bpf_object_open_attr open_attr = {};
7830         struct bpf_program *prog, *first_prog = NULL;
7831         struct bpf_object *obj;
7832         struct bpf_map *map;
7833         int err;
7834
7835         if (!attr)
7836                 return -EINVAL;
7837         if (!attr->file)
7838                 return -EINVAL;
7839
7840         open_attr.file = attr->file;
7841         open_attr.prog_type = attr->prog_type;
7842
7843         obj = bpf_object__open_xattr(&open_attr);
7844         if (IS_ERR_OR_NULL(obj))
7845                 return -ENOENT;
7846
7847         bpf_object__for_each_program(prog, obj) {
7848                 enum bpf_attach_type attach_type = attr->expected_attach_type;
7849                 /*
7850                  * to preserve backwards compatibility, bpf_prog_load treats
7851                  * attr->prog_type, if specified, as an override to whatever
7852                  * bpf_object__open guessed
7853                  */
7854                 if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
7855                         bpf_program__set_type(prog, attr->prog_type);
7856                         bpf_program__set_expected_attach_type(prog,
7857                                                               attach_type);
7858                 }
7859                 if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
7860                         /*
7861                          * we haven't guessed from section name and user
7862                          * didn't provide a fallback type, too bad...
7863                          */
7864                         bpf_object__close(obj);
7865                         return -EINVAL;
7866                 }
7867
7868                 prog->prog_ifindex = attr->ifindex;
7869                 prog->log_level = attr->log_level;
7870                 prog->prog_flags = attr->prog_flags;
7871                 if (!first_prog)
7872                         first_prog = prog;
7873         }
7874
7875         bpf_object__for_each_map(map, obj) {
7876                 if (!bpf_map__is_offload_neutral(map))
7877                         map->map_ifindex = attr->ifindex;
7878         }
7879
7880         if (!first_prog) {
7881                 pr_warn("object file doesn't contain bpf program\n");
7882                 bpf_object__close(obj);
7883                 return -ENOENT;
7884         }
7885
7886         err = bpf_object__load(obj);
7887         if (err) {
7888                 bpf_object__close(obj);
7889                 return err;
7890         }
7891
7892         *pobj = obj;
7893         *prog_fd = bpf_program__fd(first_prog);
7894         return 0;
7895 }
7896
7897 struct bpf_link {
7898         int (*detach)(struct bpf_link *link);
7899         int (*destroy)(struct bpf_link *link);
7900         char *pin_path;         /* NULL, if not pinned */
7901         int fd;                 /* hook FD, -1 if not applicable */
7902         bool disconnected;
7903 };
7904
7905 /* Replace link's underlying BPF program with the new one */
7906 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
7907 {
7908         return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
7909 }
7910
7911 /* Release "ownership" of underlying BPF resource (typically, BPF program
7912  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
7913  * link, when destructed through bpf_link__destroy() call won't attempt to
7914  * detach/unregisted that BPF resource. This is useful in situations where,
7915  * say, attached BPF program has to outlive userspace program that attached it
7916  * in the system. Depending on type of BPF program, though, there might be
7917  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
7918  * exit of userspace program doesn't trigger automatic detachment and clean up
7919  * inside the kernel.
7920  */
7921 void bpf_link__disconnect(struct bpf_link *link)
7922 {
7923         link->disconnected = true;
7924 }
7925
7926 int bpf_link__destroy(struct bpf_link *link)
7927 {
7928         int err = 0;
7929
7930         if (IS_ERR_OR_NULL(link))
7931                 return 0;
7932
7933         if (!link->disconnected && link->detach)
7934                 err = link->detach(link);
7935         if (link->destroy)
7936                 link->destroy(link);
7937         if (link->pin_path)
7938                 free(link->pin_path);
7939         free(link);
7940
7941         return err;
7942 }
7943
7944 int bpf_link__fd(const struct bpf_link *link)
7945 {
7946         return link->fd;
7947 }
7948
7949 const char *bpf_link__pin_path(const struct bpf_link *link)
7950 {
7951         return link->pin_path;
7952 }
7953
7954 static int bpf_link__detach_fd(struct bpf_link *link)
7955 {
7956         return close(link->fd);
7957 }
7958
7959 struct bpf_link *bpf_link__open(const char *path)
7960 {
7961         struct bpf_link *link;
7962         int fd;
7963
7964         fd = bpf_obj_get(path);
7965         if (fd < 0) {
7966                 fd = -errno;
7967                 pr_warn("failed to open link at %s: %d\n", path, fd);
7968                 return ERR_PTR(fd);
7969         }
7970
7971         link = calloc(1, sizeof(*link));
7972         if (!link) {
7973                 close(fd);
7974                 return ERR_PTR(-ENOMEM);
7975         }
7976         link->detach = &bpf_link__detach_fd;
7977         link->fd = fd;
7978
7979         link->pin_path = strdup(path);
7980         if (!link->pin_path) {
7981                 bpf_link__destroy(link);
7982                 return ERR_PTR(-ENOMEM);
7983         }
7984
7985         return link;
7986 }
7987
7988 int bpf_link__detach(struct bpf_link *link)
7989 {
7990         return bpf_link_detach(link->fd) ? -errno : 0;
7991 }
7992
7993 int bpf_link__pin(struct bpf_link *link, const char *path)
7994 {
7995         int err;
7996
7997         if (link->pin_path)
7998                 return -EBUSY;
7999         err = make_parent_dir(path);
8000         if (err)
8001                 return err;
8002         err = check_path(path);
8003         if (err)
8004                 return err;
8005
8006         link->pin_path = strdup(path);
8007         if (!link->pin_path)
8008                 return -ENOMEM;
8009
8010         if (bpf_obj_pin(link->fd, link->pin_path)) {
8011                 err = -errno;
8012                 zfree(&link->pin_path);
8013                 return err;
8014         }
8015
8016         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
8017         return 0;
8018 }
8019
8020 int bpf_link__unpin(struct bpf_link *link)
8021 {
8022         int err;
8023
8024         if (!link->pin_path)
8025                 return -EINVAL;
8026
8027         err = unlink(link->pin_path);
8028         if (err != 0)
8029                 return -errno;
8030
8031         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
8032         zfree(&link->pin_path);
8033         return 0;
8034 }
8035
8036 static int bpf_link__detach_perf_event(struct bpf_link *link)
8037 {
8038         int err;
8039
8040         err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
8041         if (err)
8042                 err = -errno;
8043
8044         close(link->fd);
8045         return err;
8046 }
8047
8048 struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
8049                                                 int pfd)
8050 {
8051         char errmsg[STRERR_BUFSIZE];
8052         struct bpf_link *link;
8053         int prog_fd, err;
8054
8055         if (pfd < 0) {
8056                 pr_warn("program '%s': invalid perf event FD %d\n",
8057                         bpf_program__title(prog, false), pfd);
8058                 return ERR_PTR(-EINVAL);
8059         }
8060         prog_fd = bpf_program__fd(prog);
8061         if (prog_fd < 0) {
8062                 pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
8063                         bpf_program__title(prog, false));
8064                 return ERR_PTR(-EINVAL);
8065         }
8066
8067         link = calloc(1, sizeof(*link));
8068         if (!link)
8069                 return ERR_PTR(-ENOMEM);
8070         link->detach = &bpf_link__detach_perf_event;
8071         link->fd = pfd;
8072
8073         if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
8074                 err = -errno;
8075                 free(link);
8076                 pr_warn("program '%s': failed to attach to pfd %d: %s\n",
8077                         bpf_program__title(prog, false), pfd,
8078                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8079                 if (err == -EPROTO)
8080                         pr_warn("program '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
8081                                 bpf_program__title(prog, false), pfd);
8082                 return ERR_PTR(err);
8083         }
8084         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
8085                 err = -errno;
8086                 free(link);
8087                 pr_warn("program '%s': failed to enable pfd %d: %s\n",
8088                         bpf_program__title(prog, false), pfd,
8089                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8090                 return ERR_PTR(err);
8091         }
8092         return link;
8093 }
8094
8095 /*
8096  * this function is expected to parse integer in the range of [0, 2^31-1] from
8097  * given file using scanf format string fmt. If actual parsed value is
8098  * negative, the result might be indistinguishable from error
8099  */
8100 static int parse_uint_from_file(const char *file, const char *fmt)
8101 {
8102         char buf[STRERR_BUFSIZE];
8103         int err, ret;
8104         FILE *f;
8105
8106         f = fopen(file, "r");
8107         if (!f) {
8108                 err = -errno;
8109                 pr_debug("failed to open '%s': %s\n", file,
8110                          libbpf_strerror_r(err, buf, sizeof(buf)));
8111                 return err;
8112         }
8113         err = fscanf(f, fmt, &ret);
8114         if (err != 1) {
8115                 err = err == EOF ? -EIO : -errno;
8116                 pr_debug("failed to parse '%s': %s\n", file,
8117                         libbpf_strerror_r(err, buf, sizeof(buf)));
8118                 fclose(f);
8119                 return err;
8120         }
8121         fclose(f);
8122         return ret;
8123 }
8124
8125 static int determine_kprobe_perf_type(void)
8126 {
8127         const char *file = "/sys/bus/event_source/devices/kprobe/type";
8128
8129         return parse_uint_from_file(file, "%d\n");
8130 }
8131
8132 static int determine_uprobe_perf_type(void)
8133 {
8134         const char *file = "/sys/bus/event_source/devices/uprobe/type";
8135
8136         return parse_uint_from_file(file, "%d\n");
8137 }
8138
8139 static int determine_kprobe_retprobe_bit(void)
8140 {
8141         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
8142
8143         return parse_uint_from_file(file, "config:%d\n");
8144 }
8145
8146 static int determine_uprobe_retprobe_bit(void)
8147 {
8148         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
8149
8150         return parse_uint_from_file(file, "config:%d\n");
8151 }
8152
8153 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
8154                                  uint64_t offset, int pid)
8155 {
8156         struct perf_event_attr attr = {};
8157         char errmsg[STRERR_BUFSIZE];
8158         int type, pfd, err;
8159
8160         type = uprobe ? determine_uprobe_perf_type()
8161                       : determine_kprobe_perf_type();
8162         if (type < 0) {
8163                 pr_warn("failed to determine %s perf type: %s\n",
8164                         uprobe ? "uprobe" : "kprobe",
8165                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
8166                 return type;
8167         }
8168         if (retprobe) {
8169                 int bit = uprobe ? determine_uprobe_retprobe_bit()
8170                                  : determine_kprobe_retprobe_bit();
8171
8172                 if (bit < 0) {
8173                         pr_warn("failed to determine %s retprobe bit: %s\n",
8174                                 uprobe ? "uprobe" : "kprobe",
8175                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
8176                         return bit;
8177                 }
8178                 attr.config |= 1 << bit;
8179         }
8180         attr.size = sizeof(attr);
8181         attr.type = type;
8182         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
8183         attr.config2 = offset;           /* kprobe_addr or probe_offset */
8184
8185         /* pid filter is meaningful only for uprobes */
8186         pfd = syscall(__NR_perf_event_open, &attr,
8187                       pid < 0 ? -1 : pid /* pid */,
8188                       pid == -1 ? 0 : -1 /* cpu */,
8189                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
8190         if (pfd < 0) {
8191                 err = -errno;
8192                 pr_warn("%s perf_event_open() failed: %s\n",
8193                         uprobe ? "uprobe" : "kprobe",
8194                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8195                 return err;
8196         }
8197         return pfd;
8198 }
8199
8200 struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
8201                                             bool retprobe,
8202                                             const char *func_name)
8203 {
8204         char errmsg[STRERR_BUFSIZE];
8205         struct bpf_link *link;
8206         int pfd, err;
8207
8208         pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
8209                                     0 /* offset */, -1 /* pid */);
8210         if (pfd < 0) {
8211                 pr_warn("program '%s': failed to create %s '%s' perf event: %s\n",
8212                         bpf_program__title(prog, false),
8213                         retprobe ? "kretprobe" : "kprobe", func_name,
8214                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
8215                 return ERR_PTR(pfd);
8216         }
8217         link = bpf_program__attach_perf_event(prog, pfd);
8218         if (IS_ERR(link)) {
8219                 close(pfd);
8220                 err = PTR_ERR(link);
8221                 pr_warn("program '%s': failed to attach to %s '%s': %s\n",
8222                         bpf_program__title(prog, false),
8223                         retprobe ? "kretprobe" : "kprobe", func_name,
8224                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8225                 return link;
8226         }
8227         return link;
8228 }
8229
8230 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
8231                                       struct bpf_program *prog)
8232 {
8233         const char *func_name;
8234         bool retprobe;
8235
8236         func_name = bpf_program__title(prog, false) + sec->len;
8237         retprobe = strcmp(sec->sec, "kretprobe/") == 0;
8238
8239         return bpf_program__attach_kprobe(prog, retprobe, func_name);
8240 }
8241
8242 struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
8243                                             bool retprobe, pid_t pid,
8244                                             const char *binary_path,
8245                                             size_t func_offset)
8246 {
8247         char errmsg[STRERR_BUFSIZE];
8248         struct bpf_link *link;
8249         int pfd, err;
8250
8251         pfd = perf_event_open_probe(true /* uprobe */, retprobe,
8252                                     binary_path, func_offset, pid);
8253         if (pfd < 0) {
8254                 pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
8255                         bpf_program__title(prog, false),
8256                         retprobe ? "uretprobe" : "uprobe",
8257                         binary_path, func_offset,
8258                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
8259                 return ERR_PTR(pfd);
8260         }
8261         link = bpf_program__attach_perf_event(prog, pfd);
8262         if (IS_ERR(link)) {
8263                 close(pfd);
8264                 err = PTR_ERR(link);
8265                 pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
8266                         bpf_program__title(prog, false),
8267                         retprobe ? "uretprobe" : "uprobe",
8268                         binary_path, func_offset,
8269                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8270                 return link;
8271         }
8272         return link;
8273 }
8274
8275 static int determine_tracepoint_id(const char *tp_category,
8276                                    const char *tp_name)
8277 {
8278         char file[PATH_MAX];
8279         int ret;
8280
8281         ret = snprintf(file, sizeof(file),
8282                        "/sys/kernel/debug/tracing/events/%s/%s/id",
8283                        tp_category, tp_name);
8284         if (ret < 0)
8285                 return -errno;
8286         if (ret >= sizeof(file)) {
8287                 pr_debug("tracepoint %s/%s path is too long\n",
8288                          tp_category, tp_name);
8289                 return -E2BIG;
8290         }
8291         return parse_uint_from_file(file, "%d\n");
8292 }
8293
8294 static int perf_event_open_tracepoint(const char *tp_category,
8295                                       const char *tp_name)
8296 {
8297         struct perf_event_attr attr = {};
8298         char errmsg[STRERR_BUFSIZE];
8299         int tp_id, pfd, err;
8300
8301         tp_id = determine_tracepoint_id(tp_category, tp_name);
8302         if (tp_id < 0) {
8303                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
8304                         tp_category, tp_name,
8305                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
8306                 return tp_id;
8307         }
8308
8309         attr.type = PERF_TYPE_TRACEPOINT;
8310         attr.size = sizeof(attr);
8311         attr.config = tp_id;
8312
8313         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
8314                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
8315         if (pfd < 0) {
8316                 err = -errno;
8317                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
8318                         tp_category, tp_name,
8319                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8320                 return err;
8321         }
8322         return pfd;
8323 }
8324
8325 struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
8326                                                 const char *tp_category,
8327                                                 const char *tp_name)
8328 {
8329         char errmsg[STRERR_BUFSIZE];
8330         struct bpf_link *link;
8331         int pfd, err;
8332
8333         pfd = perf_event_open_tracepoint(tp_category, tp_name);
8334         if (pfd < 0) {
8335                 pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
8336                         bpf_program__title(prog, false),
8337                         tp_category, tp_name,
8338                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
8339                 return ERR_PTR(pfd);
8340         }
8341         link = bpf_program__attach_perf_event(prog, pfd);
8342         if (IS_ERR(link)) {
8343                 close(pfd);
8344                 err = PTR_ERR(link);
8345                 pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
8346                         bpf_program__title(prog, false),
8347                         tp_category, tp_name,
8348                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
8349                 return link;
8350         }
8351         return link;
8352 }
8353
8354 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
8355                                   struct bpf_program *prog)
8356 {
8357         char *sec_name, *tp_cat, *tp_name;
8358         struct bpf_link *link;
8359
8360         sec_name = strdup(bpf_program__title(prog, false));
8361         if (!sec_name)
8362                 return ERR_PTR(-ENOMEM);
8363
8364         /* extract "tp/<category>/<name>" */
8365         tp_cat = sec_name + sec->len;
8366         tp_name = strchr(tp_cat, '/');
8367         if (!tp_name) {
8368                 link = ERR_PTR(-EINVAL);
8369                 goto out;
8370         }
8371         *tp_name = '\0';
8372         tp_name++;
8373
8374         link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
8375 out:
8376         free(sec_name);
8377         return link;
8378 }
8379
8380 struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
8381                                                     const char *tp_name)
8382 {
8383         char errmsg[STRERR_BUFSIZE];
8384         struct bpf_link *link;
8385         int prog_fd, pfd;
8386
8387         prog_fd = bpf_program__fd(prog);
8388         if (prog_fd < 0) {
8389                 pr_warn("program '%s': can't attach before loaded\n",
8390                         bpf_program__title(prog, false));
8391                 return ERR_PTR(-EINVAL);
8392         }
8393
8394         link = calloc(1, sizeof(*link));
8395         if (!link)
8396                 return ERR_PTR(-ENOMEM);
8397         link->detach = &bpf_link__detach_fd;
8398
8399         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
8400         if (pfd < 0) {
8401                 pfd = -errno;
8402                 free(link);
8403                 pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n",
8404                         bpf_program__title(prog, false), tp_name,
8405                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
8406                 return ERR_PTR(pfd);
8407         }
8408         link->fd = pfd;
8409         return link;
8410 }
8411
8412 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
8413                                       struct bpf_program *prog)
8414 {
8415         const char *tp_name = bpf_program__title(prog, false) + sec->len;
8416
8417         return bpf_program__attach_raw_tracepoint(prog, tp_name);
8418 }
8419
8420 /* Common logic for all BPF program types that attach to a btf_id */
8421 static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
8422 {
8423         char errmsg[STRERR_BUFSIZE];
8424         struct bpf_link *link;
8425         int prog_fd, pfd;
8426
8427         prog_fd = bpf_program__fd(prog);
8428         if (prog_fd < 0) {
8429                 pr_warn("program '%s': can't attach before loaded\n",
8430                         bpf_program__title(prog, false));
8431                 return ERR_PTR(-EINVAL);
8432         }
8433
8434         link = calloc(1, sizeof(*link));
8435         if (!link)
8436                 return ERR_PTR(-ENOMEM);
8437         link->detach = &bpf_link__detach_fd;
8438
8439         pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
8440         if (pfd < 0) {
8441                 pfd = -errno;
8442                 free(link);
8443                 pr_warn("program '%s': failed to attach: %s\n",
8444                         bpf_program__title(prog, false),
8445                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
8446                 return ERR_PTR(pfd);
8447         }
8448         link->fd = pfd;
8449         return (struct bpf_link *)link;
8450 }
8451
8452 struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
8453 {
8454         return bpf_program__attach_btf_id(prog);
8455 }
8456
8457 struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
8458 {
8459         return bpf_program__attach_btf_id(prog);
8460 }
8461
8462 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
8463                                      struct bpf_program *prog)
8464 {
8465         return bpf_program__attach_trace(prog);
8466 }
8467
8468 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
8469                                    struct bpf_program *prog)
8470 {
8471         return bpf_program__attach_lsm(prog);
8472 }
8473
8474 static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
8475                                     struct bpf_program *prog)
8476 {
8477         return bpf_program__attach_iter(prog, NULL);
8478 }
8479
8480 static struct bpf_link *
8481 bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
8482                        const char *target_name)
8483 {
8484         enum bpf_attach_type attach_type;
8485         char errmsg[STRERR_BUFSIZE];
8486         struct bpf_link *link;
8487         int prog_fd, link_fd;
8488
8489         prog_fd = bpf_program__fd(prog);
8490         if (prog_fd < 0) {
8491                 pr_warn("program '%s': can't attach before loaded\n",
8492                         bpf_program__title(prog, false));
8493                 return ERR_PTR(-EINVAL);
8494         }
8495
8496         link = calloc(1, sizeof(*link));
8497         if (!link)
8498                 return ERR_PTR(-ENOMEM);
8499         link->detach = &bpf_link__detach_fd;
8500
8501         attach_type = bpf_program__get_expected_attach_type(prog);
8502         link_fd = bpf_link_create(prog_fd, target_fd, attach_type, NULL);
8503         if (link_fd < 0) {
8504                 link_fd = -errno;
8505                 free(link);
8506                 pr_warn("program '%s': failed to attach to %s: %s\n",
8507                         bpf_program__title(prog, false), target_name,
8508                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
8509                 return ERR_PTR(link_fd);
8510         }
8511         link->fd = link_fd;
8512         return link;
8513 }
8514
8515 struct bpf_link *
8516 bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
8517 {
8518         return bpf_program__attach_fd(prog, cgroup_fd, "cgroup");
8519 }
8520
8521 struct bpf_link *
8522 bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
8523 {
8524         return bpf_program__attach_fd(prog, netns_fd, "netns");
8525 }
8526
8527 struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
8528 {
8529         /* target_fd/target_ifindex use the same field in LINK_CREATE */
8530         return bpf_program__attach_fd(prog, ifindex, "xdp");
8531 }
8532
8533 struct bpf_link *
8534 bpf_program__attach_iter(struct bpf_program *prog,
8535                          const struct bpf_iter_attach_opts *opts)
8536 {
8537         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
8538         char errmsg[STRERR_BUFSIZE];
8539         struct bpf_link *link;
8540         int prog_fd, link_fd;
8541         __u32 target_fd = 0;
8542
8543         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
8544                 return ERR_PTR(-EINVAL);
8545
8546         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
8547         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
8548
8549         prog_fd = bpf_program__fd(prog);
8550         if (prog_fd < 0) {
8551                 pr_warn("program '%s': can't attach before loaded\n",
8552                         bpf_program__title(prog, false));
8553                 return ERR_PTR(-EINVAL);
8554         }
8555
8556         link = calloc(1, sizeof(*link));
8557         if (!link)
8558                 return ERR_PTR(-ENOMEM);
8559         link->detach = &bpf_link__detach_fd;
8560
8561         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
8562                                   &link_create_opts);
8563         if (link_fd < 0) {
8564                 link_fd = -errno;
8565                 free(link);
8566                 pr_warn("program '%s': failed to attach to iterator: %s\n",
8567                         bpf_program__title(prog, false),
8568                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
8569                 return ERR_PTR(link_fd);
8570         }
8571         link->fd = link_fd;
8572         return link;
8573 }
8574
8575 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
8576 {
8577         const struct bpf_sec_def *sec_def;
8578
8579         sec_def = find_sec_def(bpf_program__title(prog, false));
8580         if (!sec_def || !sec_def->attach_fn)
8581                 return ERR_PTR(-ESRCH);
8582
8583         return sec_def->attach_fn(sec_def, prog);
8584 }
8585
8586 static int bpf_link__detach_struct_ops(struct bpf_link *link)
8587 {
8588         __u32 zero = 0;
8589
8590         if (bpf_map_delete_elem(link->fd, &zero))
8591                 return -errno;
8592
8593         return 0;
8594 }
8595
8596 struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
8597 {
8598         struct bpf_struct_ops *st_ops;
8599         struct bpf_link *link;
8600         __u32 i, zero = 0;
8601         int err;
8602
8603         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
8604                 return ERR_PTR(-EINVAL);
8605
8606         link = calloc(1, sizeof(*link));
8607         if (!link)
8608                 return ERR_PTR(-EINVAL);
8609
8610         st_ops = map->st_ops;
8611         for (i = 0; i < btf_vlen(st_ops->type); i++) {
8612                 struct bpf_program *prog = st_ops->progs[i];
8613                 void *kern_data;
8614                 int prog_fd;
8615
8616                 if (!prog)
8617                         continue;
8618
8619                 prog_fd = bpf_program__fd(prog);
8620                 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
8621                 *(unsigned long *)kern_data = prog_fd;
8622         }
8623
8624         err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
8625         if (err) {
8626                 err = -errno;
8627                 free(link);
8628                 return ERR_PTR(err);
8629         }
8630
8631         link->detach = bpf_link__detach_struct_ops;
8632         link->fd = map->fd;
8633
8634         return link;
8635 }
8636
8637 enum bpf_perf_event_ret
8638 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
8639                            void **copy_mem, size_t *copy_size,
8640                            bpf_perf_event_print_t fn, void *private_data)
8641 {
8642         struct perf_event_mmap_page *header = mmap_mem;
8643         __u64 data_head = ring_buffer_read_head(header);
8644         __u64 data_tail = header->data_tail;
8645         void *base = ((__u8 *)header) + page_size;
8646         int ret = LIBBPF_PERF_EVENT_CONT;
8647         struct perf_event_header *ehdr;
8648         size_t ehdr_size;
8649
8650         while (data_head != data_tail) {
8651                 ehdr = base + (data_tail & (mmap_size - 1));
8652                 ehdr_size = ehdr->size;
8653
8654                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
8655                         void *copy_start = ehdr;
8656                         size_t len_first = base + mmap_size - copy_start;
8657                         size_t len_secnd = ehdr_size - len_first;
8658
8659                         if (*copy_size < ehdr_size) {
8660                                 free(*copy_mem);
8661                                 *copy_mem = malloc(ehdr_size);
8662                                 if (!*copy_mem) {
8663                                         *copy_size = 0;
8664                                         ret = LIBBPF_PERF_EVENT_ERROR;
8665                                         break;
8666                                 }
8667                                 *copy_size = ehdr_size;
8668                         }
8669
8670                         memcpy(*copy_mem, copy_start, len_first);
8671                         memcpy(*copy_mem + len_first, base, len_secnd);
8672                         ehdr = *copy_mem;
8673                 }
8674
8675                 ret = fn(ehdr, private_data);
8676                 data_tail += ehdr_size;
8677                 if (ret != LIBBPF_PERF_EVENT_CONT)
8678                         break;
8679         }
8680
8681         ring_buffer_write_tail(header, data_tail);
8682         return ret;
8683 }
8684
8685 struct perf_buffer;
8686
8687 struct perf_buffer_params {
8688         struct perf_event_attr *attr;
8689         /* if event_cb is specified, it takes precendence */
8690         perf_buffer_event_fn event_cb;
8691         /* sample_cb and lost_cb are higher-level common-case callbacks */
8692         perf_buffer_sample_fn sample_cb;
8693         perf_buffer_lost_fn lost_cb;
8694         void *ctx;
8695         int cpu_cnt;
8696         int *cpus;
8697         int *map_keys;
8698 };
8699
8700 struct perf_cpu_buf {
8701         struct perf_buffer *pb;
8702         void *base; /* mmap()'ed memory */
8703         void *buf; /* for reconstructing segmented data */
8704         size_t buf_size;
8705         int fd;
8706         int cpu;
8707         int map_key;
8708 };
8709
8710 struct perf_buffer {
8711         perf_buffer_event_fn event_cb;
8712         perf_buffer_sample_fn sample_cb;
8713         perf_buffer_lost_fn lost_cb;
8714         void *ctx; /* passed into callbacks */
8715
8716         size_t page_size;
8717         size_t mmap_size;
8718         struct perf_cpu_buf **cpu_bufs;
8719         struct epoll_event *events;
8720         int cpu_cnt; /* number of allocated CPU buffers */
8721         int epoll_fd; /* perf event FD */
8722         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
8723 };
8724
8725 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
8726                                       struct perf_cpu_buf *cpu_buf)
8727 {
8728         if (!cpu_buf)
8729                 return;
8730         if (cpu_buf->base &&
8731             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
8732                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
8733         if (cpu_buf->fd >= 0) {
8734                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
8735                 close(cpu_buf->fd);
8736         }
8737         free(cpu_buf->buf);
8738         free(cpu_buf);
8739 }
8740
8741 void perf_buffer__free(struct perf_buffer *pb)
8742 {
8743         int i;
8744
8745         if (IS_ERR_OR_NULL(pb))
8746                 return;
8747         if (pb->cpu_bufs) {
8748                 for (i = 0; i < pb->cpu_cnt; i++) {
8749                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
8750
8751                         if (!cpu_buf)
8752                                 continue;
8753
8754                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
8755                         perf_buffer__free_cpu_buf(pb, cpu_buf);
8756                 }
8757                 free(pb->cpu_bufs);
8758         }
8759         if (pb->epoll_fd >= 0)
8760                 close(pb->epoll_fd);
8761         free(pb->events);
8762         free(pb);
8763 }
8764
8765 static struct perf_cpu_buf *
8766 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
8767                           int cpu, int map_key)
8768 {
8769         struct perf_cpu_buf *cpu_buf;
8770         char msg[STRERR_BUFSIZE];
8771         int err;
8772
8773         cpu_buf = calloc(1, sizeof(*cpu_buf));
8774         if (!cpu_buf)
8775                 return ERR_PTR(-ENOMEM);
8776
8777         cpu_buf->pb = pb;
8778         cpu_buf->cpu = cpu;
8779         cpu_buf->map_key = map_key;
8780
8781         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
8782                               -1, PERF_FLAG_FD_CLOEXEC);
8783         if (cpu_buf->fd < 0) {
8784                 err = -errno;
8785                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
8786                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
8787                 goto error;
8788         }
8789
8790         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
8791                              PROT_READ | PROT_WRITE, MAP_SHARED,
8792                              cpu_buf->fd, 0);
8793         if (cpu_buf->base == MAP_FAILED) {
8794                 cpu_buf->base = NULL;
8795                 err = -errno;
8796                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
8797                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
8798                 goto error;
8799         }
8800
8801         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
8802                 err = -errno;
8803                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
8804                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
8805                 goto error;
8806         }
8807
8808         return cpu_buf;
8809
8810 error:
8811         perf_buffer__free_cpu_buf(pb, cpu_buf);
8812         return (struct perf_cpu_buf *)ERR_PTR(err);
8813 }
8814
8815 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
8816                                               struct perf_buffer_params *p);
8817
8818 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
8819                                      const struct perf_buffer_opts *opts)
8820 {
8821         struct perf_buffer_params p = {};
8822         struct perf_event_attr attr = { 0, };
8823
8824         attr.config = PERF_COUNT_SW_BPF_OUTPUT;
8825         attr.type = PERF_TYPE_SOFTWARE;
8826         attr.sample_type = PERF_SAMPLE_RAW;
8827         attr.sample_period = 1;
8828         attr.wakeup_events = 1;
8829
8830         p.attr = &attr;
8831         p.sample_cb = opts ? opts->sample_cb : NULL;
8832         p.lost_cb = opts ? opts->lost_cb : NULL;
8833         p.ctx = opts ? opts->ctx : NULL;
8834
8835         return __perf_buffer__new(map_fd, page_cnt, &p);
8836 }
8837
8838 struct perf_buffer *
8839 perf_buffer__new_raw(int map_fd, size_t page_cnt,
8840                      const struct perf_buffer_raw_opts *opts)
8841 {
8842         struct perf_buffer_params p = {};
8843
8844         p.attr = opts->attr;
8845         p.event_cb = opts->event_cb;
8846         p.ctx = opts->ctx;
8847         p.cpu_cnt = opts->cpu_cnt;
8848         p.cpus = opts->cpus;
8849         p.map_keys = opts->map_keys;
8850
8851         return __perf_buffer__new(map_fd, page_cnt, &p);
8852 }
8853
8854 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
8855                                               struct perf_buffer_params *p)
8856 {
8857         const char *online_cpus_file = "/sys/devices/system/cpu/online";
8858         struct bpf_map_info map;
8859         char msg[STRERR_BUFSIZE];
8860         struct perf_buffer *pb;
8861         bool *online = NULL;
8862         __u32 map_info_len;
8863         int err, i, j, n;
8864
8865         if (page_cnt & (page_cnt - 1)) {
8866                 pr_warn("page count should be power of two, but is %zu\n",
8867                         page_cnt);
8868                 return ERR_PTR(-EINVAL);
8869         }
8870
8871         /* best-effort sanity checks */
8872         memset(&map, 0, sizeof(map));
8873         map_info_len = sizeof(map);
8874         err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
8875         if (err) {
8876                 err = -errno;
8877                 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
8878                  * -EBADFD, -EFAULT, or -E2BIG on real error
8879                  */
8880                 if (err != -EINVAL) {
8881                         pr_warn("failed to get map info for map FD %d: %s\n",
8882                                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
8883                         return ERR_PTR(err);
8884                 }
8885                 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
8886                          map_fd);
8887         } else {
8888                 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
8889                         pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
8890                                 map.name);
8891                         return ERR_PTR(-EINVAL);
8892                 }
8893         }
8894
8895         pb = calloc(1, sizeof(*pb));
8896         if (!pb)
8897                 return ERR_PTR(-ENOMEM);
8898
8899         pb->event_cb = p->event_cb;
8900         pb->sample_cb = p->sample_cb;
8901         pb->lost_cb = p->lost_cb;
8902         pb->ctx = p->ctx;
8903
8904         pb->page_size = getpagesize();
8905         pb->mmap_size = pb->page_size * page_cnt;
8906         pb->map_fd = map_fd;
8907
8908         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
8909         if (pb->epoll_fd < 0) {
8910                 err = -errno;
8911                 pr_warn("failed to create epoll instance: %s\n",
8912                         libbpf_strerror_r(err, msg, sizeof(msg)));
8913                 goto error;
8914         }
8915
8916         if (p->cpu_cnt > 0) {
8917                 pb->cpu_cnt = p->cpu_cnt;
8918         } else {
8919                 pb->cpu_cnt = libbpf_num_possible_cpus();
8920                 if (pb->cpu_cnt < 0) {
8921                         err = pb->cpu_cnt;
8922                         goto error;
8923                 }
8924                 if (map.max_entries && map.max_entries < pb->cpu_cnt)
8925                         pb->cpu_cnt = map.max_entries;
8926         }
8927
8928         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
8929         if (!pb->events) {
8930                 err = -ENOMEM;
8931                 pr_warn("failed to allocate events: out of memory\n");
8932                 goto error;
8933         }
8934         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
8935         if (!pb->cpu_bufs) {
8936                 err = -ENOMEM;
8937                 pr_warn("failed to allocate buffers: out of memory\n");
8938                 goto error;
8939         }
8940
8941         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
8942         if (err) {
8943                 pr_warn("failed to get online CPU mask: %d\n", err);
8944                 goto error;
8945         }
8946
8947         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
8948                 struct perf_cpu_buf *cpu_buf;
8949                 int cpu, map_key;
8950
8951                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
8952                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
8953
8954                 /* in case user didn't explicitly requested particular CPUs to
8955                  * be attached to, skip offline/not present CPUs
8956                  */
8957                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
8958                         continue;
8959
8960                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
8961                 if (IS_ERR(cpu_buf)) {
8962                         err = PTR_ERR(cpu_buf);
8963                         goto error;
8964                 }
8965
8966                 pb->cpu_bufs[j] = cpu_buf;
8967
8968                 err = bpf_map_update_elem(pb->map_fd, &map_key,
8969                                           &cpu_buf->fd, 0);
8970                 if (err) {
8971                         err = -errno;
8972                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
8973                                 cpu, map_key, cpu_buf->fd,
8974                                 libbpf_strerror_r(err, msg, sizeof(msg)));
8975                         goto error;
8976                 }
8977
8978                 pb->events[j].events = EPOLLIN;
8979                 pb->events[j].data.ptr = cpu_buf;
8980                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
8981                               &pb->events[j]) < 0) {
8982                         err = -errno;
8983                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
8984                                 cpu, cpu_buf->fd,
8985                                 libbpf_strerror_r(err, msg, sizeof(msg)));
8986                         goto error;
8987                 }
8988                 j++;
8989         }
8990         pb->cpu_cnt = j;
8991         free(online);
8992
8993         return pb;
8994
8995 error:
8996         free(online);
8997         if (pb)
8998                 perf_buffer__free(pb);
8999         return ERR_PTR(err);
9000 }
9001
9002 struct perf_sample_raw {
9003         struct perf_event_header header;
9004         uint32_t size;
9005         char data[];
9006 };
9007
9008 struct perf_sample_lost {
9009         struct perf_event_header header;
9010         uint64_t id;
9011         uint64_t lost;
9012         uint64_t sample_id;
9013 };
9014
9015 static enum bpf_perf_event_ret
9016 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
9017 {
9018         struct perf_cpu_buf *cpu_buf = ctx;
9019         struct perf_buffer *pb = cpu_buf->pb;
9020         void *data = e;
9021
9022         /* user wants full control over parsing perf event */
9023         if (pb->event_cb)
9024                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
9025
9026         switch (e->type) {
9027         case PERF_RECORD_SAMPLE: {
9028                 struct perf_sample_raw *s = data;
9029
9030                 if (pb->sample_cb)
9031                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
9032                 break;
9033         }
9034         case PERF_RECORD_LOST: {
9035                 struct perf_sample_lost *s = data;
9036
9037                 if (pb->lost_cb)
9038                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
9039                 break;
9040         }
9041         default:
9042                 pr_warn("unknown perf sample type %d\n", e->type);
9043                 return LIBBPF_PERF_EVENT_ERROR;
9044         }
9045         return LIBBPF_PERF_EVENT_CONT;
9046 }
9047
9048 static int perf_buffer__process_records(struct perf_buffer *pb,
9049                                         struct perf_cpu_buf *cpu_buf)
9050 {
9051         enum bpf_perf_event_ret ret;
9052
9053         ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
9054                                          pb->page_size, &cpu_buf->buf,
9055                                          &cpu_buf->buf_size,
9056                                          perf_buffer__process_record, cpu_buf);
9057         if (ret != LIBBPF_PERF_EVENT_CONT)
9058                 return ret;
9059         return 0;
9060 }
9061
9062 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
9063 {
9064         int i, cnt, err;
9065
9066         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
9067         for (i = 0; i < cnt; i++) {
9068                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
9069
9070                 err = perf_buffer__process_records(pb, cpu_buf);
9071                 if (err) {
9072                         pr_warn("error while processing records: %d\n", err);
9073                         return err;
9074                 }
9075         }
9076         return cnt < 0 ? -errno : cnt;
9077 }
9078
9079 int perf_buffer__consume(struct perf_buffer *pb)
9080 {
9081         int i, err;
9082
9083         for (i = 0; i < pb->cpu_cnt; i++) {
9084                 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
9085
9086                 if (!cpu_buf)
9087                         continue;
9088
9089                 err = perf_buffer__process_records(pb, cpu_buf);
9090                 if (err) {
9091                         pr_warn("error while processing records: %d\n", err);
9092                         return err;
9093                 }
9094         }
9095         return 0;
9096 }
9097
9098 struct bpf_prog_info_array_desc {
9099         int     array_offset;   /* e.g. offset of jited_prog_insns */
9100         int     count_offset;   /* e.g. offset of jited_prog_len */
9101         int     size_offset;    /* > 0: offset of rec size,
9102                                  * < 0: fix size of -size_offset
9103                                  */
9104 };
9105
9106 static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
9107         [BPF_PROG_INFO_JITED_INSNS] = {
9108                 offsetof(struct bpf_prog_info, jited_prog_insns),
9109                 offsetof(struct bpf_prog_info, jited_prog_len),
9110                 -1,
9111         },
9112         [BPF_PROG_INFO_XLATED_INSNS] = {
9113                 offsetof(struct bpf_prog_info, xlated_prog_insns),
9114                 offsetof(struct bpf_prog_info, xlated_prog_len),
9115                 -1,
9116         },
9117         [BPF_PROG_INFO_MAP_IDS] = {
9118                 offsetof(struct bpf_prog_info, map_ids),
9119                 offsetof(struct bpf_prog_info, nr_map_ids),
9120                 -(int)sizeof(__u32),
9121         },
9122         [BPF_PROG_INFO_JITED_KSYMS] = {
9123                 offsetof(struct bpf_prog_info, jited_ksyms),
9124                 offsetof(struct bpf_prog_info, nr_jited_ksyms),
9125                 -(int)sizeof(__u64),
9126         },
9127         [BPF_PROG_INFO_JITED_FUNC_LENS] = {
9128                 offsetof(struct bpf_prog_info, jited_func_lens),
9129                 offsetof(struct bpf_prog_info, nr_jited_func_lens),
9130                 -(int)sizeof(__u32),
9131         },
9132         [BPF_PROG_INFO_FUNC_INFO] = {
9133                 offsetof(struct bpf_prog_info, func_info),
9134                 offsetof(struct bpf_prog_info, nr_func_info),
9135                 offsetof(struct bpf_prog_info, func_info_rec_size),
9136         },
9137         [BPF_PROG_INFO_LINE_INFO] = {
9138                 offsetof(struct bpf_prog_info, line_info),
9139                 offsetof(struct bpf_prog_info, nr_line_info),
9140                 offsetof(struct bpf_prog_info, line_info_rec_size),
9141         },
9142         [BPF_PROG_INFO_JITED_LINE_INFO] = {
9143                 offsetof(struct bpf_prog_info, jited_line_info),
9144                 offsetof(struct bpf_prog_info, nr_jited_line_info),
9145                 offsetof(struct bpf_prog_info, jited_line_info_rec_size),
9146         },
9147         [BPF_PROG_INFO_PROG_TAGS] = {
9148                 offsetof(struct bpf_prog_info, prog_tags),
9149                 offsetof(struct bpf_prog_info, nr_prog_tags),
9150                 -(int)sizeof(__u8) * BPF_TAG_SIZE,
9151         },
9152
9153 };
9154
9155 static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
9156                                            int offset)
9157 {
9158         __u32 *array = (__u32 *)info;
9159
9160         if (offset >= 0)
9161                 return array[offset / sizeof(__u32)];
9162         return -(int)offset;
9163 }
9164
9165 static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
9166                                            int offset)
9167 {
9168         __u64 *array = (__u64 *)info;
9169
9170         if (offset >= 0)
9171                 return array[offset / sizeof(__u64)];
9172         return -(int)offset;
9173 }
9174
9175 static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
9176                                          __u32 val)
9177 {
9178         __u32 *array = (__u32 *)info;
9179
9180         if (offset >= 0)
9181                 array[offset / sizeof(__u32)] = val;
9182 }
9183
9184 static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
9185                                          __u64 val)
9186 {
9187         __u64 *array = (__u64 *)info;
9188
9189         if (offset >= 0)
9190                 array[offset / sizeof(__u64)] = val;
9191 }
9192
9193 struct bpf_prog_info_linear *
9194 bpf_program__get_prog_info_linear(int fd, __u64 arrays)
9195 {
9196         struct bpf_prog_info_linear *info_linear;
9197         struct bpf_prog_info info = {};
9198         __u32 info_len = sizeof(info);
9199         __u32 data_len = 0;
9200         int i, err;
9201         void *ptr;
9202
9203         if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
9204                 return ERR_PTR(-EINVAL);
9205
9206         /* step 1: get array dimensions */
9207         err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
9208         if (err) {
9209                 pr_debug("can't get prog info: %s", strerror(errno));
9210                 return ERR_PTR(-EFAULT);
9211         }
9212
9213         /* step 2: calculate total size of all arrays */
9214         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
9215                 bool include_array = (arrays & (1UL << i)) > 0;
9216                 struct bpf_prog_info_array_desc *desc;
9217                 __u32 count, size;
9218
9219                 desc = bpf_prog_info_array_desc + i;
9220
9221                 /* kernel is too old to support this field */
9222                 if (info_len < desc->array_offset + sizeof(__u32) ||
9223                     info_len < desc->count_offset + sizeof(__u32) ||
9224                     (desc->size_offset > 0 && info_len < desc->size_offset))
9225                         include_array = false;
9226
9227                 if (!include_array) {
9228                         arrays &= ~(1UL << i);  /* clear the bit */
9229                         continue;
9230                 }
9231
9232                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
9233                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
9234
9235                 data_len += count * size;
9236         }
9237
9238         /* step 3: allocate continuous memory */
9239         data_len = roundup(data_len, sizeof(__u64));
9240         info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
9241         if (!info_linear)
9242                 return ERR_PTR(-ENOMEM);
9243
9244         /* step 4: fill data to info_linear->info */
9245         info_linear->arrays = arrays;
9246         memset(&info_linear->info, 0, sizeof(info));
9247         ptr = info_linear->data;
9248
9249         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
9250                 struct bpf_prog_info_array_desc *desc;
9251                 __u32 count, size;
9252
9253                 if ((arrays & (1UL << i)) == 0)
9254                         continue;
9255
9256                 desc  = bpf_prog_info_array_desc + i;
9257                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
9258                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
9259                 bpf_prog_info_set_offset_u32(&info_linear->info,
9260                                              desc->count_offset, count);
9261                 bpf_prog_info_set_offset_u32(&info_linear->info,
9262                                              desc->size_offset, size);
9263                 bpf_prog_info_set_offset_u64(&info_linear->info,
9264                                              desc->array_offset,
9265                                              ptr_to_u64(ptr));
9266                 ptr += count * size;
9267         }
9268
9269         /* step 5: call syscall again to get required arrays */
9270         err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
9271         if (err) {
9272                 pr_debug("can't get prog info: %s", strerror(errno));
9273                 free(info_linear);
9274                 return ERR_PTR(-EFAULT);
9275         }
9276
9277         /* step 6: verify the data */
9278         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
9279                 struct bpf_prog_info_array_desc *desc;
9280                 __u32 v1, v2;
9281
9282                 if ((arrays & (1UL << i)) == 0)
9283                         continue;
9284
9285                 desc = bpf_prog_info_array_desc + i;
9286                 v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
9287                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
9288                                                    desc->count_offset);
9289                 if (v1 != v2)
9290                         pr_warn("%s: mismatch in element count\n", __func__);
9291
9292                 v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
9293                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
9294                                                    desc->size_offset);
9295                 if (v1 != v2)
9296                         pr_warn("%s: mismatch in rec size\n", __func__);
9297         }
9298
9299         /* step 7: update info_len and data_len */
9300         info_linear->info_len = sizeof(struct bpf_prog_info);
9301         info_linear->data_len = data_len;
9302
9303         return info_linear;
9304 }
9305
9306 void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
9307 {
9308         int i;
9309
9310         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
9311                 struct bpf_prog_info_array_desc *desc;
9312                 __u64 addr, offs;
9313
9314                 if ((info_linear->arrays & (1UL << i)) == 0)
9315                         continue;
9316
9317                 desc = bpf_prog_info_array_desc + i;
9318                 addr = bpf_prog_info_read_offset_u64(&info_linear->info,
9319                                                      desc->array_offset);
9320                 offs = addr - ptr_to_u64(info_linear->data);
9321                 bpf_prog_info_set_offset_u64(&info_linear->info,
9322                                              desc->array_offset, offs);
9323         }
9324 }
9325
9326 void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
9327 {
9328         int i;
9329
9330         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
9331                 struct bpf_prog_info_array_desc *desc;
9332                 __u64 addr, offs;
9333
9334                 if ((info_linear->arrays & (1UL << i)) == 0)
9335                         continue;
9336
9337                 desc = bpf_prog_info_array_desc + i;
9338                 offs = bpf_prog_info_read_offset_u64(&info_linear->info,
9339                                                      desc->array_offset);
9340                 addr = offs + ptr_to_u64(info_linear->data);
9341                 bpf_prog_info_set_offset_u64(&info_linear->info,
9342                                              desc->array_offset, addr);
9343         }
9344 }
9345
9346 int bpf_program__set_attach_target(struct bpf_program *prog,
9347                                    int attach_prog_fd,
9348                                    const char *attach_func_name)
9349 {
9350         int btf_id;
9351
9352         if (!prog || attach_prog_fd < 0 || !attach_func_name)
9353                 return -EINVAL;
9354
9355         if (attach_prog_fd)
9356                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
9357                                                  attach_prog_fd);
9358         else
9359                 btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
9360                                                attach_func_name,
9361                                                prog->expected_attach_type);
9362
9363         if (btf_id < 0)
9364                 return btf_id;
9365
9366         prog->attach_btf_id = btf_id;
9367         prog->attach_prog_fd = attach_prog_fd;
9368         return 0;
9369 }
9370
9371 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
9372 {
9373         int err = 0, n, len, start, end = -1;
9374         bool *tmp;
9375
9376         *mask = NULL;
9377         *mask_sz = 0;
9378
9379         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
9380         while (*s) {
9381                 if (*s == ',' || *s == '\n') {
9382                         s++;
9383                         continue;
9384                 }
9385                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
9386                 if (n <= 0 || n > 2) {
9387                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
9388                         err = -EINVAL;
9389                         goto cleanup;
9390                 } else if (n == 1) {
9391                         end = start;
9392                 }
9393                 if (start < 0 || start > end) {
9394                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
9395                                 start, end, s);
9396                         err = -EINVAL;
9397                         goto cleanup;
9398                 }
9399                 tmp = realloc(*mask, end + 1);
9400                 if (!tmp) {
9401                         err = -ENOMEM;
9402                         goto cleanup;
9403                 }
9404                 *mask = tmp;
9405                 memset(tmp + *mask_sz, 0, start - *mask_sz);
9406                 memset(tmp + start, 1, end - start + 1);
9407                 *mask_sz = end + 1;
9408                 s += len;
9409         }
9410         if (!*mask_sz) {
9411                 pr_warn("Empty CPU range\n");
9412                 return -EINVAL;
9413         }
9414         return 0;
9415 cleanup:
9416         free(*mask);
9417         *mask = NULL;
9418         return err;
9419 }
9420
9421 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
9422 {
9423         int fd, err = 0, len;
9424         char buf[128];
9425
9426         fd = open(fcpu, O_RDONLY);
9427         if (fd < 0) {
9428                 err = -errno;
9429                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
9430                 return err;
9431         }
9432         len = read(fd, buf, sizeof(buf));
9433         close(fd);
9434         if (len <= 0) {
9435                 err = len ? -errno : -EINVAL;
9436                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
9437                 return err;
9438         }
9439         if (len >= sizeof(buf)) {
9440                 pr_warn("CPU mask is too big in file %s\n", fcpu);
9441                 return -E2BIG;
9442         }
9443         buf[len] = '\0';
9444
9445         return parse_cpu_mask_str(buf, mask, mask_sz);
9446 }
9447
9448 int libbpf_num_possible_cpus(void)
9449 {
9450         static const char *fcpu = "/sys/devices/system/cpu/possible";
9451         static int cpus;
9452         int err, n, i, tmp_cpus;
9453         bool *mask;
9454
9455         tmp_cpus = READ_ONCE(cpus);
9456         if (tmp_cpus > 0)
9457                 return tmp_cpus;
9458
9459         err = parse_cpu_mask_file(fcpu, &mask, &n);
9460         if (err)
9461                 return err;
9462
9463         tmp_cpus = 0;
9464         for (i = 0; i < n; i++) {
9465                 if (mask[i])
9466                         tmp_cpus++;
9467         }
9468         free(mask);
9469
9470         WRITE_ONCE(cpus, tmp_cpus);
9471         return tmp_cpus;
9472 }
9473
9474 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
9475                               const struct bpf_object_open_opts *opts)
9476 {
9477         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
9478                 .object_name = s->name,
9479         );
9480         struct bpf_object *obj;
9481         int i;
9482
9483         /* Attempt to preserve opts->object_name, unless overriden by user
9484          * explicitly. Overwriting object name for skeletons is discouraged,
9485          * as it breaks global data maps, because they contain object name
9486          * prefix as their own map name prefix. When skeleton is generated,
9487          * bpftool is making an assumption that this name will stay the same.
9488          */
9489         if (opts) {
9490                 memcpy(&skel_opts, opts, sizeof(*opts));
9491                 if (!opts->object_name)
9492                         skel_opts.object_name = s->name;
9493         }
9494
9495         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
9496         if (IS_ERR(obj)) {
9497                 pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
9498                         s->name, PTR_ERR(obj));
9499                 return PTR_ERR(obj);
9500         }
9501
9502         *s->obj = obj;
9503
9504         for (i = 0; i < s->map_cnt; i++) {
9505                 struct bpf_map **map = s->maps[i].map;
9506                 const char *name = s->maps[i].name;
9507                 void **mmaped = s->maps[i].mmaped;
9508
9509                 *map = bpf_object__find_map_by_name(obj, name);
9510                 if (!*map) {
9511                         pr_warn("failed to find skeleton map '%s'\n", name);
9512                         return -ESRCH;
9513                 }
9514
9515                 /* externs shouldn't be pre-setup from user code */
9516                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
9517                         *mmaped = (*map)->mmaped;
9518         }
9519
9520         for (i = 0; i < s->prog_cnt; i++) {
9521                 struct bpf_program **prog = s->progs[i].prog;
9522                 const char *name = s->progs[i].name;
9523
9524                 *prog = bpf_object__find_program_by_name(obj, name);
9525                 if (!*prog) {
9526                         pr_warn("failed to find skeleton program '%s'\n", name);
9527                         return -ESRCH;
9528                 }
9529         }
9530
9531         return 0;
9532 }
9533
9534 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
9535 {
9536         int i, err;
9537
9538         err = bpf_object__load(*s->obj);
9539         if (err) {
9540                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
9541                 return err;
9542         }
9543
9544         for (i = 0; i < s->map_cnt; i++) {
9545                 struct bpf_map *map = *s->maps[i].map;
9546                 size_t mmap_sz = bpf_map_mmap_sz(map);
9547                 int prot, map_fd = bpf_map__fd(map);
9548                 void **mmaped = s->maps[i].mmaped;
9549
9550                 if (!mmaped)
9551                         continue;
9552
9553                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
9554                         *mmaped = NULL;
9555                         continue;
9556                 }
9557
9558                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
9559                         prot = PROT_READ;
9560                 else
9561                         prot = PROT_READ | PROT_WRITE;
9562
9563                 /* Remap anonymous mmap()-ed "map initialization image" as
9564                  * a BPF map-backed mmap()-ed memory, but preserving the same
9565                  * memory address. This will cause kernel to change process'
9566                  * page table to point to a different piece of kernel memory,
9567                  * but from userspace point of view memory address (and its
9568                  * contents, being identical at this point) will stay the
9569                  * same. This mapping will be released by bpf_object__close()
9570                  * as per normal clean up procedure, so we don't need to worry
9571                  * about it from skeleton's clean up perspective.
9572                  */
9573                 *mmaped = mmap(map->mmaped, mmap_sz, prot,
9574                                 MAP_SHARED | MAP_FIXED, map_fd, 0);
9575                 if (*mmaped == MAP_FAILED) {
9576                         err = -errno;
9577                         *mmaped = NULL;
9578                         pr_warn("failed to re-mmap() map '%s': %d\n",
9579                                  bpf_map__name(map), err);
9580                         return err;
9581                 }
9582         }
9583
9584         return 0;
9585 }
9586
9587 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
9588 {
9589         int i;
9590
9591         for (i = 0; i < s->prog_cnt; i++) {
9592                 struct bpf_program *prog = *s->progs[i].prog;
9593                 struct bpf_link **link = s->progs[i].link;
9594                 const struct bpf_sec_def *sec_def;
9595                 const char *sec_name = bpf_program__title(prog, false);
9596
9597                 if (!prog->load)
9598                         continue;
9599
9600                 sec_def = find_sec_def(sec_name);
9601                 if (!sec_def || !sec_def->attach_fn)
9602                         continue;
9603
9604                 *link = sec_def->attach_fn(sec_def, prog);
9605                 if (IS_ERR(*link)) {
9606                         pr_warn("failed to auto-attach program '%s': %ld\n",
9607                                 bpf_program__name(prog), PTR_ERR(*link));
9608                         return PTR_ERR(*link);
9609                 }
9610         }
9611
9612         return 0;
9613 }
9614
9615 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
9616 {
9617         int i;
9618
9619         for (i = 0; i < s->prog_cnt; i++) {
9620                 struct bpf_link **link = s->progs[i].link;
9621
9622                 bpf_link__destroy(*link);
9623                 *link = NULL;
9624         }
9625 }
9626
9627 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
9628 {
9629         if (s->progs)
9630                 bpf_object__detach_skeleton(s);
9631         if (s->obj)
9632                 bpf_object__close(*s->obj);
9633         free(s->maps);
9634         free(s->progs);
9635         free(s);
9636 }