libbpf: Allow to augment system Kconfig through extra optional config
[linux-2.6-microblaze.git] / tools / lib / bpf / libbpf.c
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4  * Common eBPF ELF object loading operations.
5  *
6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8  * Copyright (C) 2015 Huawei Inc.
9  * Copyright (C) 2017 Nicira, Inc.
10  * Copyright (C) 2019 Isovalent, Inc.
11  */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <string.h>
22 #include <unistd.h>
23 #include <endian.h>
24 #include <fcntl.h>
25 #include <errno.h>
26 #include <asm/unistd.h>
27 #include <linux/err.h>
28 #include <linux/kernel.h>
29 #include <linux/bpf.h>
30 #include <linux/btf.h>
31 #include <linux/filter.h>
32 #include <linux/list.h>
33 #include <linux/limits.h>
34 #include <linux/perf_event.h>
35 #include <linux/ring_buffer.h>
36 #include <linux/version.h>
37 #include <sys/epoll.h>
38 #include <sys/ioctl.h>
39 #include <sys/mman.h>
40 #include <sys/stat.h>
41 #include <sys/types.h>
42 #include <sys/vfs.h>
43 #include <sys/utsname.h>
44 #include <sys/resource.h>
45 #include <tools/libc_compat.h>
46 #include <libelf.h>
47 #include <gelf.h>
48 #include <zlib.h>
49
50 #include "libbpf.h"
51 #include "bpf.h"
52 #include "btf.h"
53 #include "str_error.h"
54 #include "libbpf_internal.h"
55 #include "hashmap.h"
56
57 #ifndef EM_BPF
58 #define EM_BPF 247
59 #endif
60
61 #ifndef BPF_FS_MAGIC
62 #define BPF_FS_MAGIC            0xcafe4a11
63 #endif
64
65 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
66  * compilation if user enables corresponding warning. Disable it explicitly.
67  */
68 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
69
70 #define __printf(a, b)  __attribute__((format(printf, a, b)))
71
72 static int __base_pr(enum libbpf_print_level level, const char *format,
73                      va_list args)
74 {
75         if (level == LIBBPF_DEBUG)
76                 return 0;
77
78         return vfprintf(stderr, format, args);
79 }
80
81 static libbpf_print_fn_t __libbpf_pr = __base_pr;
82
83 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
84 {
85         libbpf_print_fn_t old_print_fn = __libbpf_pr;
86
87         __libbpf_pr = fn;
88         return old_print_fn;
89 }
90
91 __printf(2, 3)
92 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
93 {
94         va_list args;
95
96         if (!__libbpf_pr)
97                 return;
98
99         va_start(args, format);
100         __libbpf_pr(level, format, args);
101         va_end(args);
102 }
103
104 static void pr_perm_msg(int err)
105 {
106         struct rlimit limit;
107         char buf[100];
108
109         if (err != -EPERM || geteuid() != 0)
110                 return;
111
112         err = getrlimit(RLIMIT_MEMLOCK, &limit);
113         if (err)
114                 return;
115
116         if (limit.rlim_cur == RLIM_INFINITY)
117                 return;
118
119         if (limit.rlim_cur < 1024)
120                 snprintf(buf, sizeof(buf), "%lu bytes", limit.rlim_cur);
121         else if (limit.rlim_cur < 1024*1024)
122                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
123         else
124                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
125
126         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
127                 buf);
128 }
129
130 #define STRERR_BUFSIZE  128
131
132 /* Copied from tools/perf/util/util.h */
133 #ifndef zfree
134 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
135 #endif
136
137 #ifndef zclose
138 # define zclose(fd) ({                  \
139         int ___err = 0;                 \
140         if ((fd) >= 0)                  \
141                 ___err = close((fd));   \
142         fd = -1;                        \
143         ___err; })
144 #endif
145
146 #ifdef HAVE_LIBELF_MMAP_SUPPORT
147 # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
148 #else
149 # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
150 #endif
151
152 static inline __u64 ptr_to_u64(const void *ptr)
153 {
154         return (__u64) (unsigned long) ptr;
155 }
156
157 struct bpf_capabilities {
158         /* v4.14: kernel support for program & map names. */
159         __u32 name:1;
160         /* v5.2: kernel support for global data sections. */
161         __u32 global_data:1;
162         /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
163         __u32 btf_func:1;
164         /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
165         __u32 btf_datasec:1;
166         /* BPF_F_MMAPABLE is supported for arrays */
167         __u32 array_mmap:1;
168 };
169
170 enum reloc_type {
171         RELO_LD64,
172         RELO_CALL,
173         RELO_DATA,
174         RELO_EXTERN,
175 };
176
177 struct reloc_desc {
178         enum reloc_type type;
179         int insn_idx;
180         int map_idx;
181         int sym_off;
182 };
183
184 /*
185  * bpf_prog should be a better name but it has been used in
186  * linux/filter.h.
187  */
188 struct bpf_program {
189         /* Index in elf obj file, for relocation use. */
190         int idx;
191         char *name;
192         int prog_ifindex;
193         char *section_name;
194         /* section_name with / replaced by _; makes recursive pinning
195          * in bpf_object__pin_programs easier
196          */
197         char *pin_name;
198         struct bpf_insn *insns;
199         size_t insns_cnt, main_prog_cnt;
200         enum bpf_prog_type type;
201
202         struct reloc_desc *reloc_desc;
203         int nr_reloc;
204         int log_level;
205
206         struct {
207                 int nr;
208                 int *fds;
209         } instances;
210         bpf_program_prep_t preprocessor;
211
212         struct bpf_object *obj;
213         void *priv;
214         bpf_program_clear_priv_t clear_priv;
215
216         enum bpf_attach_type expected_attach_type;
217         __u32 attach_btf_id;
218         __u32 attach_prog_fd;
219         void *func_info;
220         __u32 func_info_rec_size;
221         __u32 func_info_cnt;
222
223         struct bpf_capabilities *caps;
224
225         void *line_info;
226         __u32 line_info_rec_size;
227         __u32 line_info_cnt;
228         __u32 prog_flags;
229 };
230
231 #define DATA_SEC ".data"
232 #define BSS_SEC ".bss"
233 #define RODATA_SEC ".rodata"
234 #define KCONFIG_SEC ".kconfig"
235
236 enum libbpf_map_type {
237         LIBBPF_MAP_UNSPEC,
238         LIBBPF_MAP_DATA,
239         LIBBPF_MAP_BSS,
240         LIBBPF_MAP_RODATA,
241         LIBBPF_MAP_KCONFIG,
242 };
243
244 static const char * const libbpf_type_to_btf_name[] = {
245         [LIBBPF_MAP_DATA]       = DATA_SEC,
246         [LIBBPF_MAP_BSS]        = BSS_SEC,
247         [LIBBPF_MAP_RODATA]     = RODATA_SEC,
248         [LIBBPF_MAP_KCONFIG]    = KCONFIG_SEC,
249 };
250
251 struct bpf_map {
252         char *name;
253         int fd;
254         int sec_idx;
255         size_t sec_offset;
256         int map_ifindex;
257         int inner_map_fd;
258         struct bpf_map_def def;
259         __u32 btf_key_type_id;
260         __u32 btf_value_type_id;
261         void *priv;
262         bpf_map_clear_priv_t clear_priv;
263         enum libbpf_map_type libbpf_type;
264         void *mmaped;
265         char *pin_path;
266         bool pinned;
267         bool reused;
268 };
269
270 enum extern_type {
271         EXT_UNKNOWN,
272         EXT_CHAR,
273         EXT_BOOL,
274         EXT_INT,
275         EXT_TRISTATE,
276         EXT_CHAR_ARR,
277 };
278
279 struct extern_desc {
280         const char *name;
281         int sym_idx;
282         int btf_id;
283         enum extern_type type;
284         int sz;
285         int align;
286         int data_off;
287         bool is_signed;
288         bool is_weak;
289         bool is_set;
290 };
291
292 static LIST_HEAD(bpf_objects_list);
293
294 struct bpf_object {
295         char name[BPF_OBJ_NAME_LEN];
296         char license[64];
297         __u32 kern_version;
298
299         struct bpf_program *programs;
300         size_t nr_programs;
301         struct bpf_map *maps;
302         size_t nr_maps;
303         size_t maps_cap;
304
305         char *kconfig;
306         struct extern_desc *externs;
307         int nr_extern;
308         int kconfig_map_idx;
309
310         bool loaded;
311         bool has_pseudo_calls;
312         bool relaxed_core_relocs;
313
314         /*
315          * Information when doing elf related work. Only valid if fd
316          * is valid.
317          */
318         struct {
319                 int fd;
320                 const void *obj_buf;
321                 size_t obj_buf_sz;
322                 Elf *elf;
323                 GElf_Ehdr ehdr;
324                 Elf_Data *symbols;
325                 Elf_Data *data;
326                 Elf_Data *rodata;
327                 Elf_Data *bss;
328                 size_t strtabidx;
329                 struct {
330                         GElf_Shdr shdr;
331                         Elf_Data *data;
332                 } *reloc_sects;
333                 int nr_reloc_sects;
334                 int maps_shndx;
335                 int btf_maps_shndx;
336                 int text_shndx;
337                 int symbols_shndx;
338                 int data_shndx;
339                 int rodata_shndx;
340                 int bss_shndx;
341         } efile;
342         /*
343          * All loaded bpf_object is linked in a list, which is
344          * hidden to caller. bpf_objects__<func> handlers deal with
345          * all objects.
346          */
347         struct list_head list;
348
349         struct btf *btf;
350         struct btf_ext *btf_ext;
351
352         void *priv;
353         bpf_object_clear_priv_t clear_priv;
354
355         struct bpf_capabilities caps;
356
357         char path[];
358 };
359 #define obj_elf_valid(o)        ((o)->efile.elf)
360
361 void bpf_program__unload(struct bpf_program *prog)
362 {
363         int i;
364
365         if (!prog)
366                 return;
367
368         /*
369          * If the object is opened but the program was never loaded,
370          * it is possible that prog->instances.nr == -1.
371          */
372         if (prog->instances.nr > 0) {
373                 for (i = 0; i < prog->instances.nr; i++)
374                         zclose(prog->instances.fds[i]);
375         } else if (prog->instances.nr != -1) {
376                 pr_warn("Internal error: instances.nr is %d\n",
377                         prog->instances.nr);
378         }
379
380         prog->instances.nr = -1;
381         zfree(&prog->instances.fds);
382
383         zfree(&prog->func_info);
384         zfree(&prog->line_info);
385 }
386
387 static void bpf_program__exit(struct bpf_program *prog)
388 {
389         if (!prog)
390                 return;
391
392         if (prog->clear_priv)
393                 prog->clear_priv(prog, prog->priv);
394
395         prog->priv = NULL;
396         prog->clear_priv = NULL;
397
398         bpf_program__unload(prog);
399         zfree(&prog->name);
400         zfree(&prog->section_name);
401         zfree(&prog->pin_name);
402         zfree(&prog->insns);
403         zfree(&prog->reloc_desc);
404
405         prog->nr_reloc = 0;
406         prog->insns_cnt = 0;
407         prog->idx = -1;
408 }
409
410 static char *__bpf_program__pin_name(struct bpf_program *prog)
411 {
412         char *name, *p;
413
414         name = p = strdup(prog->section_name);
415         while ((p = strchr(p, '/')))
416                 *p = '_';
417
418         return name;
419 }
420
421 static int
422 bpf_program__init(void *data, size_t size, char *section_name, int idx,
423                   struct bpf_program *prog)
424 {
425         const size_t bpf_insn_sz = sizeof(struct bpf_insn);
426
427         if (size == 0 || size % bpf_insn_sz) {
428                 pr_warn("corrupted section '%s', size: %zu\n",
429                         section_name, size);
430                 return -EINVAL;
431         }
432
433         memset(prog, 0, sizeof(*prog));
434
435         prog->section_name = strdup(section_name);
436         if (!prog->section_name) {
437                 pr_warn("failed to alloc name for prog under section(%d) %s\n",
438                         idx, section_name);
439                 goto errout;
440         }
441
442         prog->pin_name = __bpf_program__pin_name(prog);
443         if (!prog->pin_name) {
444                 pr_warn("failed to alloc pin name for prog under section(%d) %s\n",
445                         idx, section_name);
446                 goto errout;
447         }
448
449         prog->insns = malloc(size);
450         if (!prog->insns) {
451                 pr_warn("failed to alloc insns for prog under section %s\n",
452                         section_name);
453                 goto errout;
454         }
455         prog->insns_cnt = size / bpf_insn_sz;
456         memcpy(prog->insns, data, size);
457         prog->idx = idx;
458         prog->instances.fds = NULL;
459         prog->instances.nr = -1;
460         prog->type = BPF_PROG_TYPE_UNSPEC;
461
462         return 0;
463 errout:
464         bpf_program__exit(prog);
465         return -ENOMEM;
466 }
467
468 static int
469 bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
470                         char *section_name, int idx)
471 {
472         struct bpf_program prog, *progs;
473         int nr_progs, err;
474
475         err = bpf_program__init(data, size, section_name, idx, &prog);
476         if (err)
477                 return err;
478
479         prog.caps = &obj->caps;
480         progs = obj->programs;
481         nr_progs = obj->nr_programs;
482
483         progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
484         if (!progs) {
485                 /*
486                  * In this case the original obj->programs
487                  * is still valid, so don't need special treat for
488                  * bpf_close_object().
489                  */
490                 pr_warn("failed to alloc a new program under section '%s'\n",
491                         section_name);
492                 bpf_program__exit(&prog);
493                 return -ENOMEM;
494         }
495
496         pr_debug("found program %s\n", prog.section_name);
497         obj->programs = progs;
498         obj->nr_programs = nr_progs + 1;
499         prog.obj = obj;
500         progs[nr_progs] = prog;
501         return 0;
502 }
503
504 static int
505 bpf_object__init_prog_names(struct bpf_object *obj)
506 {
507         Elf_Data *symbols = obj->efile.symbols;
508         struct bpf_program *prog;
509         size_t pi, si;
510
511         for (pi = 0; pi < obj->nr_programs; pi++) {
512                 const char *name = NULL;
513
514                 prog = &obj->programs[pi];
515
516                 for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
517                      si++) {
518                         GElf_Sym sym;
519
520                         if (!gelf_getsym(symbols, si, &sym))
521                                 continue;
522                         if (sym.st_shndx != prog->idx)
523                                 continue;
524                         if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
525                                 continue;
526
527                         name = elf_strptr(obj->efile.elf,
528                                           obj->efile.strtabidx,
529                                           sym.st_name);
530                         if (!name) {
531                                 pr_warn("failed to get sym name string for prog %s\n",
532                                         prog->section_name);
533                                 return -LIBBPF_ERRNO__LIBELF;
534                         }
535                 }
536
537                 if (!name && prog->idx == obj->efile.text_shndx)
538                         name = ".text";
539
540                 if (!name) {
541                         pr_warn("failed to find sym for prog %s\n",
542                                 prog->section_name);
543                         return -EINVAL;
544                 }
545
546                 prog->name = strdup(name);
547                 if (!prog->name) {
548                         pr_warn("failed to allocate memory for prog sym %s\n",
549                                 name);
550                         return -ENOMEM;
551                 }
552         }
553
554         return 0;
555 }
556
557 static __u32 get_kernel_version(void)
558 {
559         __u32 major, minor, patch;
560         struct utsname info;
561
562         uname(&info);
563         if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
564                 return 0;
565         return KERNEL_VERSION(major, minor, patch);
566 }
567
568 static struct bpf_object *bpf_object__new(const char *path,
569                                           const void *obj_buf,
570                                           size_t obj_buf_sz,
571                                           const char *obj_name)
572 {
573         struct bpf_object *obj;
574         char *end;
575
576         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
577         if (!obj) {
578                 pr_warn("alloc memory failed for %s\n", path);
579                 return ERR_PTR(-ENOMEM);
580         }
581
582         strcpy(obj->path, path);
583         if (obj_name) {
584                 strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
585                 obj->name[sizeof(obj->name) - 1] = 0;
586         } else {
587                 /* Using basename() GNU version which doesn't modify arg. */
588                 strncpy(obj->name, basename((void *)path),
589                         sizeof(obj->name) - 1);
590                 end = strchr(obj->name, '.');
591                 if (end)
592                         *end = 0;
593         }
594
595         obj->efile.fd = -1;
596         /*
597          * Caller of this function should also call
598          * bpf_object__elf_finish() after data collection to return
599          * obj_buf to user. If not, we should duplicate the buffer to
600          * avoid user freeing them before elf finish.
601          */
602         obj->efile.obj_buf = obj_buf;
603         obj->efile.obj_buf_sz = obj_buf_sz;
604         obj->efile.maps_shndx = -1;
605         obj->efile.btf_maps_shndx = -1;
606         obj->efile.data_shndx = -1;
607         obj->efile.rodata_shndx = -1;
608         obj->efile.bss_shndx = -1;
609         obj->kconfig_map_idx = -1;
610
611         obj->kern_version = get_kernel_version();
612         obj->loaded = false;
613
614         INIT_LIST_HEAD(&obj->list);
615         list_add(&obj->list, &bpf_objects_list);
616         return obj;
617 }
618
619 static void bpf_object__elf_finish(struct bpf_object *obj)
620 {
621         if (!obj_elf_valid(obj))
622                 return;
623
624         if (obj->efile.elf) {
625                 elf_end(obj->efile.elf);
626                 obj->efile.elf = NULL;
627         }
628         obj->efile.symbols = NULL;
629         obj->efile.data = NULL;
630         obj->efile.rodata = NULL;
631         obj->efile.bss = NULL;
632
633         zfree(&obj->efile.reloc_sects);
634         obj->efile.nr_reloc_sects = 0;
635         zclose(obj->efile.fd);
636         obj->efile.obj_buf = NULL;
637         obj->efile.obj_buf_sz = 0;
638 }
639
640 static int bpf_object__elf_init(struct bpf_object *obj)
641 {
642         int err = 0;
643         GElf_Ehdr *ep;
644
645         if (obj_elf_valid(obj)) {
646                 pr_warn("elf init: internal error\n");
647                 return -LIBBPF_ERRNO__LIBELF;
648         }
649
650         if (obj->efile.obj_buf_sz > 0) {
651                 /*
652                  * obj_buf should have been validated by
653                  * bpf_object__open_buffer().
654                  */
655                 obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
656                                             obj->efile.obj_buf_sz);
657         } else {
658                 obj->efile.fd = open(obj->path, O_RDONLY);
659                 if (obj->efile.fd < 0) {
660                         char errmsg[STRERR_BUFSIZE], *cp;
661
662                         err = -errno;
663                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
664                         pr_warn("failed to open %s: %s\n", obj->path, cp);
665                         return err;
666                 }
667
668                 obj->efile.elf = elf_begin(obj->efile.fd,
669                                            LIBBPF_ELF_C_READ_MMAP, NULL);
670         }
671
672         if (!obj->efile.elf) {
673                 pr_warn("failed to open %s as ELF file\n", obj->path);
674                 err = -LIBBPF_ERRNO__LIBELF;
675                 goto errout;
676         }
677
678         if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
679                 pr_warn("failed to get EHDR from %s\n", obj->path);
680                 err = -LIBBPF_ERRNO__FORMAT;
681                 goto errout;
682         }
683         ep = &obj->efile.ehdr;
684
685         /* Old LLVM set e_machine to EM_NONE */
686         if (ep->e_type != ET_REL ||
687             (ep->e_machine && ep->e_machine != EM_BPF)) {
688                 pr_warn("%s is not an eBPF object file\n", obj->path);
689                 err = -LIBBPF_ERRNO__FORMAT;
690                 goto errout;
691         }
692
693         return 0;
694 errout:
695         bpf_object__elf_finish(obj);
696         return err;
697 }
698
699 static int bpf_object__check_endianness(struct bpf_object *obj)
700 {
701 #if __BYTE_ORDER == __LITTLE_ENDIAN
702         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
703                 return 0;
704 #elif __BYTE_ORDER == __BIG_ENDIAN
705         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
706                 return 0;
707 #else
708 # error "Unrecognized __BYTE_ORDER__"
709 #endif
710         pr_warn("endianness mismatch.\n");
711         return -LIBBPF_ERRNO__ENDIAN;
712 }
713
714 static int
715 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
716 {
717         memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
718         pr_debug("license of %s is %s\n", obj->path, obj->license);
719         return 0;
720 }
721
722 static int
723 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
724 {
725         __u32 kver;
726
727         if (size != sizeof(kver)) {
728                 pr_warn("invalid kver section in %s\n", obj->path);
729                 return -LIBBPF_ERRNO__FORMAT;
730         }
731         memcpy(&kver, data, sizeof(kver));
732         obj->kern_version = kver;
733         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
734         return 0;
735 }
736
737 static int compare_bpf_map(const void *_a, const void *_b)
738 {
739         const struct bpf_map *a = _a;
740         const struct bpf_map *b = _b;
741
742         if (a->sec_idx != b->sec_idx)
743                 return a->sec_idx - b->sec_idx;
744         return a->sec_offset - b->sec_offset;
745 }
746
747 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
748 {
749         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
750             type == BPF_MAP_TYPE_HASH_OF_MAPS)
751                 return true;
752         return false;
753 }
754
755 static int bpf_object_search_section_size(const struct bpf_object *obj,
756                                           const char *name, size_t *d_size)
757 {
758         const GElf_Ehdr *ep = &obj->efile.ehdr;
759         Elf *elf = obj->efile.elf;
760         Elf_Scn *scn = NULL;
761         int idx = 0;
762
763         while ((scn = elf_nextscn(elf, scn)) != NULL) {
764                 const char *sec_name;
765                 Elf_Data *data;
766                 GElf_Shdr sh;
767
768                 idx++;
769                 if (gelf_getshdr(scn, &sh) != &sh) {
770                         pr_warn("failed to get section(%d) header from %s\n",
771                                 idx, obj->path);
772                         return -EIO;
773                 }
774
775                 sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
776                 if (!sec_name) {
777                         pr_warn("failed to get section(%d) name from %s\n",
778                                 idx, obj->path);
779                         return -EIO;
780                 }
781
782                 if (strcmp(name, sec_name))
783                         continue;
784
785                 data = elf_getdata(scn, 0);
786                 if (!data) {
787                         pr_warn("failed to get section(%d) data from %s(%s)\n",
788                                 idx, name, obj->path);
789                         return -EIO;
790                 }
791
792                 *d_size = data->d_size;
793                 return 0;
794         }
795
796         return -ENOENT;
797 }
798
799 int bpf_object__section_size(const struct bpf_object *obj, const char *name,
800                              __u32 *size)
801 {
802         int ret = -ENOENT;
803         size_t d_size;
804
805         *size = 0;
806         if (!name) {
807                 return -EINVAL;
808         } else if (!strcmp(name, DATA_SEC)) {
809                 if (obj->efile.data)
810                         *size = obj->efile.data->d_size;
811         } else if (!strcmp(name, BSS_SEC)) {
812                 if (obj->efile.bss)
813                         *size = obj->efile.bss->d_size;
814         } else if (!strcmp(name, RODATA_SEC)) {
815                 if (obj->efile.rodata)
816                         *size = obj->efile.rodata->d_size;
817         } else {
818                 ret = bpf_object_search_section_size(obj, name, &d_size);
819                 if (!ret)
820                         *size = d_size;
821         }
822
823         return *size ? 0 : ret;
824 }
825
826 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
827                                 __u32 *off)
828 {
829         Elf_Data *symbols = obj->efile.symbols;
830         const char *sname;
831         size_t si;
832
833         if (!name || !off)
834                 return -EINVAL;
835
836         for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
837                 GElf_Sym sym;
838
839                 if (!gelf_getsym(symbols, si, &sym))
840                         continue;
841                 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
842                     GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
843                         continue;
844
845                 sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
846                                    sym.st_name);
847                 if (!sname) {
848                         pr_warn("failed to get sym name string for var %s\n",
849                                 name);
850                         return -EIO;
851                 }
852                 if (strcmp(name, sname) == 0) {
853                         *off = sym.st_value;
854                         return 0;
855                 }
856         }
857
858         return -ENOENT;
859 }
860
861 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
862 {
863         struct bpf_map *new_maps;
864         size_t new_cap;
865         int i;
866
867         if (obj->nr_maps < obj->maps_cap)
868                 return &obj->maps[obj->nr_maps++];
869
870         new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
871         new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
872         if (!new_maps) {
873                 pr_warn("alloc maps for object failed\n");
874                 return ERR_PTR(-ENOMEM);
875         }
876
877         obj->maps_cap = new_cap;
878         obj->maps = new_maps;
879
880         /* zero out new maps */
881         memset(obj->maps + obj->nr_maps, 0,
882                (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
883         /*
884          * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
885          * when failure (zclose won't close negative fd)).
886          */
887         for (i = obj->nr_maps; i < obj->maps_cap; i++) {
888                 obj->maps[i].fd = -1;
889                 obj->maps[i].inner_map_fd = -1;
890         }
891
892         return &obj->maps[obj->nr_maps++];
893 }
894
895 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
896 {
897         long page_sz = sysconf(_SC_PAGE_SIZE);
898         size_t map_sz;
899
900         map_sz = roundup(map->def.value_size, 8) * map->def.max_entries;
901         map_sz = roundup(map_sz, page_sz);
902         return map_sz;
903 }
904
905 static char *internal_map_name(struct bpf_object *obj,
906                                enum libbpf_map_type type)
907 {
908         char map_name[BPF_OBJ_NAME_LEN];
909         const char *sfx = libbpf_type_to_btf_name[type];
910         int sfx_len = max((size_t)7, strlen(sfx));
911         int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
912                           strlen(obj->name));
913
914         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
915                  sfx_len, libbpf_type_to_btf_name[type]);
916
917         return strdup(map_name);
918 }
919
920 static int
921 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
922                               int sec_idx, void *data, size_t data_sz)
923 {
924         struct bpf_map_def *def;
925         struct bpf_map *map;
926         int err;
927
928         map = bpf_object__add_map(obj);
929         if (IS_ERR(map))
930                 return PTR_ERR(map);
931
932         map->libbpf_type = type;
933         map->sec_idx = sec_idx;
934         map->sec_offset = 0;
935         map->name = internal_map_name(obj, type);
936         if (!map->name) {
937                 pr_warn("failed to alloc map name\n");
938                 return -ENOMEM;
939         }
940
941         def = &map->def;
942         def->type = BPF_MAP_TYPE_ARRAY;
943         def->key_size = sizeof(int);
944         def->value_size = data_sz;
945         def->max_entries = 1;
946         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
947                          ? BPF_F_RDONLY_PROG : 0;
948         def->map_flags |= BPF_F_MMAPABLE;
949
950         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
951                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
952
953         map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
954                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
955         if (map->mmaped == MAP_FAILED) {
956                 err = -errno;
957                 map->mmaped = NULL;
958                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
959                         map->name, err);
960                 zfree(&map->name);
961                 return err;
962         }
963
964         if (data)
965                 memcpy(map->mmaped, data, data_sz);
966
967         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
968         return 0;
969 }
970
971 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
972 {
973         int err;
974
975         /*
976          * Populate obj->maps with libbpf internal maps.
977          */
978         if (obj->efile.data_shndx >= 0) {
979                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
980                                                     obj->efile.data_shndx,
981                                                     obj->efile.data->d_buf,
982                                                     obj->efile.data->d_size);
983                 if (err)
984                         return err;
985         }
986         if (obj->efile.rodata_shndx >= 0) {
987                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
988                                                     obj->efile.rodata_shndx,
989                                                     obj->efile.rodata->d_buf,
990                                                     obj->efile.rodata->d_size);
991                 if (err)
992                         return err;
993         }
994         if (obj->efile.bss_shndx >= 0) {
995                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
996                                                     obj->efile.bss_shndx,
997                                                     NULL,
998                                                     obj->efile.bss->d_size);
999                 if (err)
1000                         return err;
1001         }
1002         return 0;
1003 }
1004
1005
1006 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1007                                                const void *name)
1008 {
1009         int i;
1010
1011         for (i = 0; i < obj->nr_extern; i++) {
1012                 if (strcmp(obj->externs[i].name, name) == 0)
1013                         return &obj->externs[i];
1014         }
1015         return NULL;
1016 }
1017
1018 static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
1019                              char value)
1020 {
1021         switch (ext->type) {
1022         case EXT_BOOL:
1023                 if (value == 'm') {
1024                         pr_warn("extern %s=%c should be tristate or char\n",
1025                                 ext->name, value);
1026                         return -EINVAL;
1027                 }
1028                 *(bool *)ext_val = value == 'y' ? true : false;
1029                 break;
1030         case EXT_TRISTATE:
1031                 if (value == 'y')
1032                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1033                 else if (value == 'm')
1034                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1035                 else /* value == 'n' */
1036                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1037                 break;
1038         case EXT_CHAR:
1039                 *(char *)ext_val = value;
1040                 break;
1041         case EXT_UNKNOWN:
1042         case EXT_INT:
1043         case EXT_CHAR_ARR:
1044         default:
1045                 pr_warn("extern %s=%c should be bool, tristate, or char\n",
1046                         ext->name, value);
1047                 return -EINVAL;
1048         }
1049         ext->is_set = true;
1050         return 0;
1051 }
1052
1053 static int set_ext_value_str(struct extern_desc *ext, char *ext_val,
1054                              const char *value)
1055 {
1056         size_t len;
1057
1058         if (ext->type != EXT_CHAR_ARR) {
1059                 pr_warn("extern %s=%s should char array\n", ext->name, value);
1060                 return -EINVAL;
1061         }
1062
1063         len = strlen(value);
1064         if (value[len - 1] != '"') {
1065                 pr_warn("extern '%s': invalid string config '%s'\n",
1066                         ext->name, value);
1067                 return -EINVAL;
1068         }
1069
1070         /* strip quotes */
1071         len -= 2;
1072         if (len >= ext->sz) {
1073                 pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
1074                         ext->name, value, len, ext->sz - 1);
1075                 len = ext->sz - 1;
1076         }
1077         memcpy(ext_val, value + 1, len);
1078         ext_val[len] = '\0';
1079         ext->is_set = true;
1080         return 0;
1081 }
1082
1083 static int parse_u64(const char *value, __u64 *res)
1084 {
1085         char *value_end;
1086         int err;
1087
1088         errno = 0;
1089         *res = strtoull(value, &value_end, 0);
1090         if (errno) {
1091                 err = -errno;
1092                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1093                 return err;
1094         }
1095         if (*value_end) {
1096                 pr_warn("failed to parse '%s' as integer completely\n", value);
1097                 return -EINVAL;
1098         }
1099         return 0;
1100 }
1101
1102 static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
1103 {
1104         int bit_sz = ext->sz * 8;
1105
1106         if (ext->sz == 8)
1107                 return true;
1108
1109         /* Validate that value stored in u64 fits in integer of `ext->sz`
1110          * bytes size without any loss of information. If the target integer
1111          * is signed, we rely on the following limits of integer type of
1112          * Y bits and subsequent transformation:
1113          *
1114          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1115          *            0 <= X + 2^(Y-1) <= 2^Y - 1
1116          *            0 <= X + 2^(Y-1) <  2^Y
1117          *
1118          *  For unsigned target integer, check that all the (64 - Y) bits are
1119          *  zero.
1120          */
1121         if (ext->is_signed)
1122                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1123         else
1124                 return (v >> bit_sz) == 0;
1125 }
1126
1127 static int set_ext_value_num(struct extern_desc *ext, void *ext_val,
1128                              __u64 value)
1129 {
1130         if (ext->type != EXT_INT && ext->type != EXT_CHAR) {
1131                 pr_warn("extern %s=%llu should be integer\n",
1132                         ext->name, value);
1133                 return -EINVAL;
1134         }
1135         if (!is_ext_value_in_range(ext, value)) {
1136                 pr_warn("extern %s=%llu value doesn't fit in %d bytes\n",
1137                         ext->name, value, ext->sz);
1138                 return -ERANGE;
1139         }
1140         switch (ext->sz) {
1141                 case 1: *(__u8 *)ext_val = value; break;
1142                 case 2: *(__u16 *)ext_val = value; break;
1143                 case 4: *(__u32 *)ext_val = value; break;
1144                 case 8: *(__u64 *)ext_val = value; break;
1145                 default:
1146                         return -EINVAL;
1147         }
1148         ext->is_set = true;
1149         return 0;
1150 }
1151
1152 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1153                                             char *buf, void *data)
1154 {
1155         struct extern_desc *ext;
1156         char *sep, *value;
1157         int len, err = 0;
1158         void *ext_val;
1159         __u64 num;
1160
1161         if (strncmp(buf, "CONFIG_", 7))
1162                 return 0;
1163
1164         sep = strchr(buf, '=');
1165         if (!sep) {
1166                 pr_warn("failed to parse '%s': no separator\n", buf);
1167                 return -EINVAL;
1168         }
1169
1170         /* Trim ending '\n' */
1171         len = strlen(buf);
1172         if (buf[len - 1] == '\n')
1173                 buf[len - 1] = '\0';
1174         /* Split on '=' and ensure that a value is present. */
1175         *sep = '\0';
1176         if (!sep[1]) {
1177                 *sep = '=';
1178                 pr_warn("failed to parse '%s': no value\n", buf);
1179                 return -EINVAL;
1180         }
1181
1182         ext = find_extern_by_name(obj, buf);
1183         if (!ext || ext->is_set)
1184                 return 0;
1185
1186         ext_val = data + ext->data_off;
1187         value = sep + 1;
1188
1189         switch (*value) {
1190         case 'y': case 'n': case 'm':
1191                 err = set_ext_value_tri(ext, ext_val, *value);
1192                 break;
1193         case '"':
1194                 err = set_ext_value_str(ext, ext_val, value);
1195                 break;
1196         default:
1197                 /* assume integer */
1198                 err = parse_u64(value, &num);
1199                 if (err) {
1200                         pr_warn("extern %s=%s should be integer\n",
1201                                 ext->name, value);
1202                         return err;
1203                 }
1204                 err = set_ext_value_num(ext, ext_val, num);
1205                 break;
1206         }
1207         if (err)
1208                 return err;
1209         pr_debug("extern %s=%s\n", ext->name, value);
1210         return 0;
1211 }
1212
1213 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1214 {
1215         char buf[PATH_MAX];
1216         struct utsname uts;
1217         int len, err = 0;
1218         gzFile file;
1219
1220         uname(&uts);
1221         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1222         if (len < 0)
1223                 return -EINVAL;
1224         else if (len >= PATH_MAX)
1225                 return -ENAMETOOLONG;
1226
1227         /* gzopen also accepts uncompressed files. */
1228         file = gzopen(buf, "r");
1229         if (!file)
1230                 file = gzopen("/proc/config.gz", "r");
1231
1232         if (!file) {
1233                 pr_warn("failed to open system Kconfig\n");
1234                 return -ENOENT;
1235         }
1236
1237         while (gzgets(file, buf, sizeof(buf))) {
1238                 err = bpf_object__process_kconfig_line(obj, buf, data);
1239                 if (err) {
1240                         pr_warn("error parsing system Kconfig line '%s': %d\n",
1241                                 buf, err);
1242                         goto out;
1243                 }
1244         }
1245
1246 out:
1247         gzclose(file);
1248         return err;
1249 }
1250
1251 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1252                                         const char *config, void *data)
1253 {
1254         char buf[PATH_MAX];
1255         int err = 0;
1256         FILE *file;
1257
1258         file = fmemopen((void *)config, strlen(config), "r");
1259         if (!file) {
1260                 err = -errno;
1261                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
1262                 return err;
1263         }
1264
1265         while (fgets(buf, sizeof(buf), file)) {
1266                 err = bpf_object__process_kconfig_line(obj, buf, data);
1267                 if (err) {
1268                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1269                                 buf, err);
1270                         break;
1271                 }
1272         }
1273
1274         fclose(file);
1275         return err;
1276 }
1277
1278 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
1279 {
1280         struct extern_desc *last_ext;
1281         size_t map_sz;
1282         int err;
1283
1284         if (obj->nr_extern == 0)
1285                 return 0;
1286
1287         last_ext = &obj->externs[obj->nr_extern - 1];
1288         map_sz = last_ext->data_off + last_ext->sz;
1289
1290         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
1291                                             obj->efile.symbols_shndx,
1292                                             NULL, map_sz);
1293         if (err)
1294                 return err;
1295
1296         obj->kconfig_map_idx = obj->nr_maps - 1;
1297
1298         return 0;
1299 }
1300
1301 static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
1302 {
1303         Elf_Data *symbols = obj->efile.symbols;
1304         int i, map_def_sz = 0, nr_maps = 0, nr_syms;
1305         Elf_Data *data = NULL;
1306         Elf_Scn *scn;
1307
1308         if (obj->efile.maps_shndx < 0)
1309                 return 0;
1310
1311         if (!symbols)
1312                 return -EINVAL;
1313
1314         scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
1315         if (scn)
1316                 data = elf_getdata(scn, NULL);
1317         if (!scn || !data) {
1318                 pr_warn("failed to get Elf_Data from map section %d\n",
1319                         obj->efile.maps_shndx);
1320                 return -EINVAL;
1321         }
1322
1323         /*
1324          * Count number of maps. Each map has a name.
1325          * Array of maps is not supported: only the first element is
1326          * considered.
1327          *
1328          * TODO: Detect array of map and report error.
1329          */
1330         nr_syms = symbols->d_size / sizeof(GElf_Sym);
1331         for (i = 0; i < nr_syms; i++) {
1332                 GElf_Sym sym;
1333
1334                 if (!gelf_getsym(symbols, i, &sym))
1335                         continue;
1336                 if (sym.st_shndx != obj->efile.maps_shndx)
1337                         continue;
1338                 nr_maps++;
1339         }
1340         /* Assume equally sized map definitions */
1341         pr_debug("maps in %s: %d maps in %zd bytes\n",
1342                  obj->path, nr_maps, data->d_size);
1343
1344         if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
1345                 pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n",
1346                         obj->path, nr_maps, data->d_size);
1347                 return -EINVAL;
1348         }
1349         map_def_sz = data->d_size / nr_maps;
1350
1351         /* Fill obj->maps using data in "maps" section.  */
1352         for (i = 0; i < nr_syms; i++) {
1353                 GElf_Sym sym;
1354                 const char *map_name;
1355                 struct bpf_map_def *def;
1356                 struct bpf_map *map;
1357
1358                 if (!gelf_getsym(symbols, i, &sym))
1359                         continue;
1360                 if (sym.st_shndx != obj->efile.maps_shndx)
1361                         continue;
1362
1363                 map = bpf_object__add_map(obj);
1364                 if (IS_ERR(map))
1365                         return PTR_ERR(map);
1366
1367                 map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
1368                                       sym.st_name);
1369                 if (!map_name) {
1370                         pr_warn("failed to get map #%d name sym string for obj %s\n",
1371                                 i, obj->path);
1372                         return -LIBBPF_ERRNO__FORMAT;
1373                 }
1374
1375                 map->libbpf_type = LIBBPF_MAP_UNSPEC;
1376                 map->sec_idx = sym.st_shndx;
1377                 map->sec_offset = sym.st_value;
1378                 pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
1379                          map_name, map->sec_idx, map->sec_offset);
1380                 if (sym.st_value + map_def_sz > data->d_size) {
1381                         pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
1382                                 obj->path, map_name);
1383                         return -EINVAL;
1384                 }
1385
1386                 map->name = strdup(map_name);
1387                 if (!map->name) {
1388                         pr_warn("failed to alloc map name\n");
1389                         return -ENOMEM;
1390                 }
1391                 pr_debug("map %d is \"%s\"\n", i, map->name);
1392                 def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
1393                 /*
1394                  * If the definition of the map in the object file fits in
1395                  * bpf_map_def, copy it.  Any extra fields in our version
1396                  * of bpf_map_def will default to zero as a result of the
1397                  * calloc above.
1398                  */
1399                 if (map_def_sz <= sizeof(struct bpf_map_def)) {
1400                         memcpy(&map->def, def, map_def_sz);
1401                 } else {
1402                         /*
1403                          * Here the map structure being read is bigger than what
1404                          * we expect, truncate if the excess bits are all zero.
1405                          * If they are not zero, reject this map as
1406                          * incompatible.
1407                          */
1408                         char *b;
1409
1410                         for (b = ((char *)def) + sizeof(struct bpf_map_def);
1411                              b < ((char *)def) + map_def_sz; b++) {
1412                                 if (*b != 0) {
1413                                         pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
1414                                                 obj->path, map_name);
1415                                         if (strict)
1416                                                 return -EINVAL;
1417                                 }
1418                         }
1419                         memcpy(&map->def, def, sizeof(struct bpf_map_def));
1420                 }
1421         }
1422         return 0;
1423 }
1424
1425 static const struct btf_type *
1426 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
1427 {
1428         const struct btf_type *t = btf__type_by_id(btf, id);
1429
1430         if (res_id)
1431                 *res_id = id;
1432
1433         while (btf_is_mod(t) || btf_is_typedef(t)) {
1434                 if (res_id)
1435                         *res_id = t->type;
1436                 t = btf__type_by_id(btf, t->type);
1437         }
1438
1439         return t;
1440 }
1441
1442 /*
1443  * Fetch integer attribute of BTF map definition. Such attributes are
1444  * represented using a pointer to an array, in which dimensionality of array
1445  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
1446  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
1447  * type definition, while using only sizeof(void *) space in ELF data section.
1448  */
1449 static bool get_map_field_int(const char *map_name, const struct btf *btf,
1450                               const struct btf_type *def,
1451                               const struct btf_member *m, __u32 *res)
1452 {
1453         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
1454         const char *name = btf__name_by_offset(btf, m->name_off);
1455         const struct btf_array *arr_info;
1456         const struct btf_type *arr_t;
1457
1458         if (!btf_is_ptr(t)) {
1459                 pr_warn("map '%s': attr '%s': expected PTR, got %u.\n",
1460                         map_name, name, btf_kind(t));
1461                 return false;
1462         }
1463
1464         arr_t = btf__type_by_id(btf, t->type);
1465         if (!arr_t) {
1466                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
1467                         map_name, name, t->type);
1468                 return false;
1469         }
1470         if (!btf_is_array(arr_t)) {
1471                 pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n",
1472                         map_name, name, btf_kind(arr_t));
1473                 return false;
1474         }
1475         arr_info = btf_array(arr_t);
1476         *res = arr_info->nelems;
1477         return true;
1478 }
1479
1480 static int build_map_pin_path(struct bpf_map *map, const char *path)
1481 {
1482         char buf[PATH_MAX];
1483         int err, len;
1484
1485         if (!path)
1486                 path = "/sys/fs/bpf";
1487
1488         len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
1489         if (len < 0)
1490                 return -EINVAL;
1491         else if (len >= PATH_MAX)
1492                 return -ENAMETOOLONG;
1493
1494         err = bpf_map__set_pin_path(map, buf);
1495         if (err)
1496                 return err;
1497
1498         return 0;
1499 }
1500
1501 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
1502                                          const struct btf_type *sec,
1503                                          int var_idx, int sec_idx,
1504                                          const Elf_Data *data, bool strict,
1505                                          const char *pin_root_path)
1506 {
1507         const struct btf_type *var, *def, *t;
1508         const struct btf_var_secinfo *vi;
1509         const struct btf_var *var_extra;
1510         const struct btf_member *m;
1511         const char *map_name;
1512         struct bpf_map *map;
1513         int vlen, i;
1514
1515         vi = btf_var_secinfos(sec) + var_idx;
1516         var = btf__type_by_id(obj->btf, vi->type);
1517         var_extra = btf_var(var);
1518         map_name = btf__name_by_offset(obj->btf, var->name_off);
1519         vlen = btf_vlen(var);
1520
1521         if (map_name == NULL || map_name[0] == '\0') {
1522                 pr_warn("map #%d: empty name.\n", var_idx);
1523                 return -EINVAL;
1524         }
1525         if ((__u64)vi->offset + vi->size > data->d_size) {
1526                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
1527                 return -EINVAL;
1528         }
1529         if (!btf_is_var(var)) {
1530                 pr_warn("map '%s': unexpected var kind %u.\n",
1531                         map_name, btf_kind(var));
1532                 return -EINVAL;
1533         }
1534         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
1535             var_extra->linkage != BTF_VAR_STATIC) {
1536                 pr_warn("map '%s': unsupported var linkage %u.\n",
1537                         map_name, var_extra->linkage);
1538                 return -EOPNOTSUPP;
1539         }
1540
1541         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
1542         if (!btf_is_struct(def)) {
1543                 pr_warn("map '%s': unexpected def kind %u.\n",
1544                         map_name, btf_kind(var));
1545                 return -EINVAL;
1546         }
1547         if (def->size > vi->size) {
1548                 pr_warn("map '%s': invalid def size.\n", map_name);
1549                 return -EINVAL;
1550         }
1551
1552         map = bpf_object__add_map(obj);
1553         if (IS_ERR(map))
1554                 return PTR_ERR(map);
1555         map->name = strdup(map_name);
1556         if (!map->name) {
1557                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
1558                 return -ENOMEM;
1559         }
1560         map->libbpf_type = LIBBPF_MAP_UNSPEC;
1561         map->def.type = BPF_MAP_TYPE_UNSPEC;
1562         map->sec_idx = sec_idx;
1563         map->sec_offset = vi->offset;
1564         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
1565                  map_name, map->sec_idx, map->sec_offset);
1566
1567         vlen = btf_vlen(def);
1568         m = btf_members(def);
1569         for (i = 0; i < vlen; i++, m++) {
1570                 const char *name = btf__name_by_offset(obj->btf, m->name_off);
1571
1572                 if (!name) {
1573                         pr_warn("map '%s': invalid field #%d.\n", map_name, i);
1574                         return -EINVAL;
1575                 }
1576                 if (strcmp(name, "type") == 0) {
1577                         if (!get_map_field_int(map_name, obj->btf, def, m,
1578                                                &map->def.type))
1579                                 return -EINVAL;
1580                         pr_debug("map '%s': found type = %u.\n",
1581                                  map_name, map->def.type);
1582                 } else if (strcmp(name, "max_entries") == 0) {
1583                         if (!get_map_field_int(map_name, obj->btf, def, m,
1584                                                &map->def.max_entries))
1585                                 return -EINVAL;
1586                         pr_debug("map '%s': found max_entries = %u.\n",
1587                                  map_name, map->def.max_entries);
1588                 } else if (strcmp(name, "map_flags") == 0) {
1589                         if (!get_map_field_int(map_name, obj->btf, def, m,
1590                                                &map->def.map_flags))
1591                                 return -EINVAL;
1592                         pr_debug("map '%s': found map_flags = %u.\n",
1593                                  map_name, map->def.map_flags);
1594                 } else if (strcmp(name, "key_size") == 0) {
1595                         __u32 sz;
1596
1597                         if (!get_map_field_int(map_name, obj->btf, def, m,
1598                                                &sz))
1599                                 return -EINVAL;
1600                         pr_debug("map '%s': found key_size = %u.\n",
1601                                  map_name, sz);
1602                         if (map->def.key_size && map->def.key_size != sz) {
1603                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
1604                                         map_name, map->def.key_size, sz);
1605                                 return -EINVAL;
1606                         }
1607                         map->def.key_size = sz;
1608                 } else if (strcmp(name, "key") == 0) {
1609                         __s64 sz;
1610
1611                         t = btf__type_by_id(obj->btf, m->type);
1612                         if (!t) {
1613                                 pr_warn("map '%s': key type [%d] not found.\n",
1614                                         map_name, m->type);
1615                                 return -EINVAL;
1616                         }
1617                         if (!btf_is_ptr(t)) {
1618                                 pr_warn("map '%s': key spec is not PTR: %u.\n",
1619                                         map_name, btf_kind(t));
1620                                 return -EINVAL;
1621                         }
1622                         sz = btf__resolve_size(obj->btf, t->type);
1623                         if (sz < 0) {
1624                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
1625                                         map_name, t->type, (ssize_t)sz);
1626                                 return sz;
1627                         }
1628                         pr_debug("map '%s': found key [%u], sz = %zd.\n",
1629                                  map_name, t->type, (ssize_t)sz);
1630                         if (map->def.key_size && map->def.key_size != sz) {
1631                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
1632                                         map_name, map->def.key_size, (ssize_t)sz);
1633                                 return -EINVAL;
1634                         }
1635                         map->def.key_size = sz;
1636                         map->btf_key_type_id = t->type;
1637                 } else if (strcmp(name, "value_size") == 0) {
1638                         __u32 sz;
1639
1640                         if (!get_map_field_int(map_name, obj->btf, def, m,
1641                                                &sz))
1642                                 return -EINVAL;
1643                         pr_debug("map '%s': found value_size = %u.\n",
1644                                  map_name, sz);
1645                         if (map->def.value_size && map->def.value_size != sz) {
1646                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
1647                                         map_name, map->def.value_size, sz);
1648                                 return -EINVAL;
1649                         }
1650                         map->def.value_size = sz;
1651                 } else if (strcmp(name, "value") == 0) {
1652                         __s64 sz;
1653
1654                         t = btf__type_by_id(obj->btf, m->type);
1655                         if (!t) {
1656                                 pr_warn("map '%s': value type [%d] not found.\n",
1657                                         map_name, m->type);
1658                                 return -EINVAL;
1659                         }
1660                         if (!btf_is_ptr(t)) {
1661                                 pr_warn("map '%s': value spec is not PTR: %u.\n",
1662                                         map_name, btf_kind(t));
1663                                 return -EINVAL;
1664                         }
1665                         sz = btf__resolve_size(obj->btf, t->type);
1666                         if (sz < 0) {
1667                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
1668                                         map_name, t->type, (ssize_t)sz);
1669                                 return sz;
1670                         }
1671                         pr_debug("map '%s': found value [%u], sz = %zd.\n",
1672                                  map_name, t->type, (ssize_t)sz);
1673                         if (map->def.value_size && map->def.value_size != sz) {
1674                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
1675                                         map_name, map->def.value_size, (ssize_t)sz);
1676                                 return -EINVAL;
1677                         }
1678                         map->def.value_size = sz;
1679                         map->btf_value_type_id = t->type;
1680                 } else if (strcmp(name, "pinning") == 0) {
1681                         __u32 val;
1682                         int err;
1683
1684                         if (!get_map_field_int(map_name, obj->btf, def, m,
1685                                                &val))
1686                                 return -EINVAL;
1687                         pr_debug("map '%s': found pinning = %u.\n",
1688                                  map_name, val);
1689
1690                         if (val != LIBBPF_PIN_NONE &&
1691                             val != LIBBPF_PIN_BY_NAME) {
1692                                 pr_warn("map '%s': invalid pinning value %u.\n",
1693                                         map_name, val);
1694                                 return -EINVAL;
1695                         }
1696                         if (val == LIBBPF_PIN_BY_NAME) {
1697                                 err = build_map_pin_path(map, pin_root_path);
1698                                 if (err) {
1699                                         pr_warn("map '%s': couldn't build pin path.\n",
1700                                                 map_name);
1701                                         return err;
1702                                 }
1703                         }
1704                 } else {
1705                         if (strict) {
1706                                 pr_warn("map '%s': unknown field '%s'.\n",
1707                                         map_name, name);
1708                                 return -ENOTSUP;
1709                         }
1710                         pr_debug("map '%s': ignoring unknown field '%s'.\n",
1711                                  map_name, name);
1712                 }
1713         }
1714
1715         if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
1716                 pr_warn("map '%s': map type isn't specified.\n", map_name);
1717                 return -EINVAL;
1718         }
1719
1720         return 0;
1721 }
1722
1723 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
1724                                           const char *pin_root_path)
1725 {
1726         const struct btf_type *sec = NULL;
1727         int nr_types, i, vlen, err;
1728         const struct btf_type *t;
1729         const char *name;
1730         Elf_Data *data;
1731         Elf_Scn *scn;
1732
1733         if (obj->efile.btf_maps_shndx < 0)
1734                 return 0;
1735
1736         scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
1737         if (scn)
1738                 data = elf_getdata(scn, NULL);
1739         if (!scn || !data) {
1740                 pr_warn("failed to get Elf_Data from map section %d (%s)\n",
1741                         obj->efile.maps_shndx, MAPS_ELF_SEC);
1742                 return -EINVAL;
1743         }
1744
1745         nr_types = btf__get_nr_types(obj->btf);
1746         for (i = 1; i <= nr_types; i++) {
1747                 t = btf__type_by_id(obj->btf, i);
1748                 if (!btf_is_datasec(t))
1749                         continue;
1750                 name = btf__name_by_offset(obj->btf, t->name_off);
1751                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
1752                         sec = t;
1753                         break;
1754                 }
1755         }
1756
1757         if (!sec) {
1758                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
1759                 return -ENOENT;
1760         }
1761
1762         vlen = btf_vlen(sec);
1763         for (i = 0; i < vlen; i++) {
1764                 err = bpf_object__init_user_btf_map(obj, sec, i,
1765                                                     obj->efile.btf_maps_shndx,
1766                                                     data, strict,
1767                                                     pin_root_path);
1768                 if (err)
1769                         return err;
1770         }
1771
1772         return 0;
1773 }
1774
1775 static int bpf_object__init_maps(struct bpf_object *obj,
1776                                  const struct bpf_object_open_opts *opts)
1777 {
1778         const char *pin_root_path;
1779         bool strict;
1780         int err;
1781
1782         strict = !OPTS_GET(opts, relaxed_maps, false);
1783         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
1784
1785         err = bpf_object__init_user_maps(obj, strict);
1786         err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
1787         err = err ?: bpf_object__init_global_data_maps(obj);
1788         err = err ?: bpf_object__init_kconfig_map(obj);
1789         if (err)
1790                 return err;
1791
1792         if (obj->nr_maps) {
1793                 qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),
1794                       compare_bpf_map);
1795         }
1796         return 0;
1797 }
1798
1799 static bool section_have_execinstr(struct bpf_object *obj, int idx)
1800 {
1801         Elf_Scn *scn;
1802         GElf_Shdr sh;
1803
1804         scn = elf_getscn(obj->efile.elf, idx);
1805         if (!scn)
1806                 return false;
1807
1808         if (gelf_getshdr(scn, &sh) != &sh)
1809                 return false;
1810
1811         if (sh.sh_flags & SHF_EXECINSTR)
1812                 return true;
1813
1814         return false;
1815 }
1816
1817 static void bpf_object__sanitize_btf(struct bpf_object *obj)
1818 {
1819         bool has_datasec = obj->caps.btf_datasec;
1820         bool has_func = obj->caps.btf_func;
1821         struct btf *btf = obj->btf;
1822         struct btf_type *t;
1823         int i, j, vlen;
1824
1825         if (!obj->btf || (has_func && has_datasec))
1826                 return;
1827
1828         for (i = 1; i <= btf__get_nr_types(btf); i++) {
1829                 t = (struct btf_type *)btf__type_by_id(btf, i);
1830
1831                 if (!has_datasec && btf_is_var(t)) {
1832                         /* replace VAR with INT */
1833                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
1834                         /*
1835                          * using size = 1 is the safest choice, 4 will be too
1836                          * big and cause kernel BTF validation failure if
1837                          * original variable took less than 4 bytes
1838                          */
1839                         t->size = 1;
1840                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
1841                 } else if (!has_datasec && btf_is_datasec(t)) {
1842                         /* replace DATASEC with STRUCT */
1843                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
1844                         struct btf_member *m = btf_members(t);
1845                         struct btf_type *vt;
1846                         char *name;
1847
1848                         name = (char *)btf__name_by_offset(btf, t->name_off);
1849                         while (*name) {
1850                                 if (*name == '.')
1851                                         *name = '_';
1852                                 name++;
1853                         }
1854
1855                         vlen = btf_vlen(t);
1856                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
1857                         for (j = 0; j < vlen; j++, v++, m++) {
1858                                 /* order of field assignments is important */
1859                                 m->offset = v->offset * 8;
1860                                 m->type = v->type;
1861                                 /* preserve variable name as member name */
1862                                 vt = (void *)btf__type_by_id(btf, v->type);
1863                                 m->name_off = vt->name_off;
1864                         }
1865                 } else if (!has_func && btf_is_func_proto(t)) {
1866                         /* replace FUNC_PROTO with ENUM */
1867                         vlen = btf_vlen(t);
1868                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
1869                         t->size = sizeof(__u32); /* kernel enforced */
1870                 } else if (!has_func && btf_is_func(t)) {
1871                         /* replace FUNC with TYPEDEF */
1872                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
1873                 }
1874         }
1875 }
1876
1877 static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
1878 {
1879         if (!obj->btf_ext)
1880                 return;
1881
1882         if (!obj->caps.btf_func) {
1883                 btf_ext__free(obj->btf_ext);
1884                 obj->btf_ext = NULL;
1885         }
1886 }
1887
1888 static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
1889 {
1890         return obj->efile.btf_maps_shndx >= 0;
1891 }
1892
1893 static int bpf_object__init_btf(struct bpf_object *obj,
1894                                 Elf_Data *btf_data,
1895                                 Elf_Data *btf_ext_data)
1896 {
1897         bool btf_required = bpf_object__is_btf_mandatory(obj);
1898         int err = 0;
1899
1900         if (btf_data) {
1901                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
1902                 if (IS_ERR(obj->btf)) {
1903                         pr_warn("Error loading ELF section %s: %d.\n",
1904                                 BTF_ELF_SEC, err);
1905                         goto out;
1906                 }
1907         }
1908         if (btf_ext_data) {
1909                 if (!obj->btf) {
1910                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
1911                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
1912                         goto out;
1913                 }
1914                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
1915                                             btf_ext_data->d_size);
1916                 if (IS_ERR(obj->btf_ext)) {
1917                         pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
1918                                 BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
1919                         obj->btf_ext = NULL;
1920                         goto out;
1921                 }
1922         }
1923 out:
1924         if (err || IS_ERR(obj->btf)) {
1925                 if (btf_required)
1926                         err = err ? : PTR_ERR(obj->btf);
1927                 else
1928                         err = 0;
1929                 if (!IS_ERR_OR_NULL(obj->btf))
1930                         btf__free(obj->btf);
1931                 obj->btf = NULL;
1932         }
1933         if (btf_required && !obj->btf) {
1934                 pr_warn("BTF is required, but is missing or corrupted.\n");
1935                 return err == 0 ? -ENOENT : err;
1936         }
1937         return 0;
1938 }
1939
1940 static int bpf_object__finalize_btf(struct bpf_object *obj)
1941 {
1942         int err;
1943
1944         if (!obj->btf)
1945                 return 0;
1946
1947         err = btf__finalize_data(obj, obj->btf);
1948         if (!err)
1949                 return 0;
1950
1951         pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
1952         btf__free(obj->btf);
1953         obj->btf = NULL;
1954         btf_ext__free(obj->btf_ext);
1955         obj->btf_ext = NULL;
1956
1957         if (bpf_object__is_btf_mandatory(obj)) {
1958                 pr_warn("BTF is required, but is missing or corrupted.\n");
1959                 return -ENOENT;
1960         }
1961         return 0;
1962 }
1963
1964 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
1965 {
1966         int err = 0;
1967
1968         if (!obj->btf)
1969                 return 0;
1970
1971         bpf_object__sanitize_btf(obj);
1972         bpf_object__sanitize_btf_ext(obj);
1973
1974         err = btf__load(obj->btf);
1975         if (err) {
1976                 pr_warn("Error loading %s into kernel: %d.\n",
1977                         BTF_ELF_SEC, err);
1978                 btf__free(obj->btf);
1979                 obj->btf = NULL;
1980                 /* btf_ext can't exist without btf, so free it as well */
1981                 if (obj->btf_ext) {
1982                         btf_ext__free(obj->btf_ext);
1983                         obj->btf_ext = NULL;
1984                 }
1985
1986                 if (bpf_object__is_btf_mandatory(obj))
1987                         return err;
1988         }
1989         return 0;
1990 }
1991
1992 static int bpf_object__elf_collect(struct bpf_object *obj)
1993 {
1994         Elf *elf = obj->efile.elf;
1995         GElf_Ehdr *ep = &obj->efile.ehdr;
1996         Elf_Data *btf_ext_data = NULL;
1997         Elf_Data *btf_data = NULL;
1998         Elf_Scn *scn = NULL;
1999         int idx = 0, err = 0;
2000
2001         /* Elf is corrupted/truncated, avoid calling elf_strptr. */
2002         if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
2003                 pr_warn("failed to get e_shstrndx from %s\n", obj->path);
2004                 return -LIBBPF_ERRNO__FORMAT;
2005         }
2006
2007         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2008                 char *name;
2009                 GElf_Shdr sh;
2010                 Elf_Data *data;
2011
2012                 idx++;
2013                 if (gelf_getshdr(scn, &sh) != &sh) {
2014                         pr_warn("failed to get section(%d) header from %s\n",
2015                                 idx, obj->path);
2016                         return -LIBBPF_ERRNO__FORMAT;
2017                 }
2018
2019                 name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
2020                 if (!name) {
2021                         pr_warn("failed to get section(%d) name from %s\n",
2022                                 idx, obj->path);
2023                         return -LIBBPF_ERRNO__FORMAT;
2024                 }
2025
2026                 data = elf_getdata(scn, 0);
2027                 if (!data) {
2028                         pr_warn("failed to get section(%d) data from %s(%s)\n",
2029                                 idx, name, obj->path);
2030                         return -LIBBPF_ERRNO__FORMAT;
2031                 }
2032                 pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
2033                          idx, name, (unsigned long)data->d_size,
2034                          (int)sh.sh_link, (unsigned long)sh.sh_flags,
2035                          (int)sh.sh_type);
2036
2037                 if (strcmp(name, "license") == 0) {
2038                         err = bpf_object__init_license(obj,
2039                                                        data->d_buf,
2040                                                        data->d_size);
2041                         if (err)
2042                                 return err;
2043                 } else if (strcmp(name, "version") == 0) {
2044                         err = bpf_object__init_kversion(obj,
2045                                                         data->d_buf,
2046                                                         data->d_size);
2047                         if (err)
2048                                 return err;
2049                 } else if (strcmp(name, "maps") == 0) {
2050                         obj->efile.maps_shndx = idx;
2051                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
2052                         obj->efile.btf_maps_shndx = idx;
2053                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
2054                         btf_data = data;
2055                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
2056                         btf_ext_data = data;
2057                 } else if (sh.sh_type == SHT_SYMTAB) {
2058                         if (obj->efile.symbols) {
2059                                 pr_warn("bpf: multiple SYMTAB in %s\n",
2060                                         obj->path);
2061                                 return -LIBBPF_ERRNO__FORMAT;
2062                         }
2063                         obj->efile.symbols = data;
2064                         obj->efile.symbols_shndx = idx;
2065                         obj->efile.strtabidx = sh.sh_link;
2066                 } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
2067                         if (sh.sh_flags & SHF_EXECINSTR) {
2068                                 if (strcmp(name, ".text") == 0)
2069                                         obj->efile.text_shndx = idx;
2070                                 err = bpf_object__add_program(obj, data->d_buf,
2071                                                               data->d_size,
2072                                                               name, idx);
2073                                 if (err) {
2074                                         char errmsg[STRERR_BUFSIZE];
2075                                         char *cp;
2076
2077                                         cp = libbpf_strerror_r(-err, errmsg,
2078                                                                sizeof(errmsg));
2079                                         pr_warn("failed to alloc program %s (%s): %s",
2080                                                 name, obj->path, cp);
2081                                         return err;
2082                                 }
2083                         } else if (strcmp(name, DATA_SEC) == 0) {
2084                                 obj->efile.data = data;
2085                                 obj->efile.data_shndx = idx;
2086                         } else if (strcmp(name, RODATA_SEC) == 0) {
2087                                 obj->efile.rodata = data;
2088                                 obj->efile.rodata_shndx = idx;
2089                         } else {
2090                                 pr_debug("skip section(%d) %s\n", idx, name);
2091                         }
2092                 } else if (sh.sh_type == SHT_REL) {
2093                         int nr_sects = obj->efile.nr_reloc_sects;
2094                         void *sects = obj->efile.reloc_sects;
2095                         int sec = sh.sh_info; /* points to other section */
2096
2097                         /* Only do relo for section with exec instructions */
2098                         if (!section_have_execinstr(obj, sec)) {
2099                                 pr_debug("skip relo %s(%d) for section(%d)\n",
2100                                          name, idx, sec);
2101                                 continue;
2102                         }
2103
2104                         sects = reallocarray(sects, nr_sects + 1,
2105                                              sizeof(*obj->efile.reloc_sects));
2106                         if (!sects) {
2107                                 pr_warn("reloc_sects realloc failed\n");
2108                                 return -ENOMEM;
2109                         }
2110
2111                         obj->efile.reloc_sects = sects;
2112                         obj->efile.nr_reloc_sects++;
2113
2114                         obj->efile.reloc_sects[nr_sects].shdr = sh;
2115                         obj->efile.reloc_sects[nr_sects].data = data;
2116                 } else if (sh.sh_type == SHT_NOBITS &&
2117                            strcmp(name, BSS_SEC) == 0) {
2118                         obj->efile.bss = data;
2119                         obj->efile.bss_shndx = idx;
2120                 } else {
2121                         pr_debug("skip section(%d) %s\n", idx, name);
2122                 }
2123         }
2124
2125         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
2126                 pr_warn("Corrupted ELF file: index of strtab invalid\n");
2127                 return -LIBBPF_ERRNO__FORMAT;
2128         }
2129         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
2130 }
2131
2132 static bool sym_is_extern(const GElf_Sym *sym)
2133 {
2134         int bind = GELF_ST_BIND(sym->st_info);
2135         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
2136         return sym->st_shndx == SHN_UNDEF &&
2137                (bind == STB_GLOBAL || bind == STB_WEAK) &&
2138                GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
2139 }
2140
2141 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
2142 {
2143         const struct btf_type *t;
2144         const char *var_name;
2145         int i, n;
2146
2147         if (!btf)
2148                 return -ESRCH;
2149
2150         n = btf__get_nr_types(btf);
2151         for (i = 1; i <= n; i++) {
2152                 t = btf__type_by_id(btf, i);
2153
2154                 if (!btf_is_var(t))
2155                         continue;
2156
2157                 var_name = btf__name_by_offset(btf, t->name_off);
2158                 if (strcmp(var_name, ext_name))
2159                         continue;
2160
2161                 if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
2162                         return -EINVAL;
2163
2164                 return i;
2165         }
2166
2167         return -ENOENT;
2168 }
2169
2170 static enum extern_type find_extern_type(const struct btf *btf, int id,
2171                                          bool *is_signed)
2172 {
2173         const struct btf_type *t;
2174         const char *name;
2175
2176         t = skip_mods_and_typedefs(btf, id, NULL);
2177         name = btf__name_by_offset(btf, t->name_off);
2178
2179         if (is_signed)
2180                 *is_signed = false;
2181         switch (btf_kind(t)) {
2182         case BTF_KIND_INT: {
2183                 int enc = btf_int_encoding(t);
2184
2185                 if (enc & BTF_INT_BOOL)
2186                         return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN;
2187                 if (is_signed)
2188                         *is_signed = enc & BTF_INT_SIGNED;
2189                 if (t->size == 1)
2190                         return EXT_CHAR;
2191                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
2192                         return EXT_UNKNOWN;
2193                 return EXT_INT;
2194         }
2195         case BTF_KIND_ENUM:
2196                 if (t->size != 4)
2197                         return EXT_UNKNOWN;
2198                 if (strcmp(name, "libbpf_tristate"))
2199                         return EXT_UNKNOWN;
2200                 return EXT_TRISTATE;
2201         case BTF_KIND_ARRAY:
2202                 if (btf_array(t)->nelems == 0)
2203                         return EXT_UNKNOWN;
2204                 if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR)
2205                         return EXT_UNKNOWN;
2206                 return EXT_CHAR_ARR;
2207         default:
2208                 return EXT_UNKNOWN;
2209         }
2210 }
2211
2212 static int cmp_externs(const void *_a, const void *_b)
2213 {
2214         const struct extern_desc *a = _a;
2215         const struct extern_desc *b = _b;
2216
2217         /* descending order by alignment requirements */
2218         if (a->align != b->align)
2219                 return a->align > b->align ? -1 : 1;
2220         /* ascending order by size, within same alignment class */
2221         if (a->sz != b->sz)
2222                 return a->sz < b->sz ? -1 : 1;
2223         /* resolve ties by name */
2224         return strcmp(a->name, b->name);
2225 }
2226
2227 static int bpf_object__collect_externs(struct bpf_object *obj)
2228 {
2229         const struct btf_type *t;
2230         struct extern_desc *ext;
2231         int i, n, off, btf_id;
2232         struct btf_type *sec;
2233         const char *ext_name;
2234         Elf_Scn *scn;
2235         GElf_Shdr sh;
2236
2237         if (!obj->efile.symbols)
2238                 return 0;
2239
2240         scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
2241         if (!scn)
2242                 return -LIBBPF_ERRNO__FORMAT;
2243         if (gelf_getshdr(scn, &sh) != &sh)
2244                 return -LIBBPF_ERRNO__FORMAT;
2245         n = sh.sh_size / sh.sh_entsize;
2246
2247         pr_debug("looking for externs among %d symbols...\n", n);
2248         for (i = 0; i < n; i++) {
2249                 GElf_Sym sym;
2250
2251                 if (!gelf_getsym(obj->efile.symbols, i, &sym))
2252                         return -LIBBPF_ERRNO__FORMAT;
2253                 if (!sym_is_extern(&sym))
2254                         continue;
2255                 ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
2256                                       sym.st_name);
2257                 if (!ext_name || !ext_name[0])
2258                         continue;
2259
2260                 ext = obj->externs;
2261                 ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
2262                 if (!ext)
2263                         return -ENOMEM;
2264                 obj->externs = ext;
2265                 ext = &ext[obj->nr_extern];
2266                 memset(ext, 0, sizeof(*ext));
2267                 obj->nr_extern++;
2268
2269                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
2270                 if (ext->btf_id <= 0) {
2271                         pr_warn("failed to find BTF for extern '%s': %d\n",
2272                                 ext_name, ext->btf_id);
2273                         return ext->btf_id;
2274                 }
2275                 t = btf__type_by_id(obj->btf, ext->btf_id);
2276                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
2277                 ext->sym_idx = i;
2278                 ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
2279                 ext->sz = btf__resolve_size(obj->btf, t->type);
2280                 if (ext->sz <= 0) {
2281                         pr_warn("failed to resolve size of extern '%s': %d\n",
2282                                 ext_name, ext->sz);
2283                         return ext->sz;
2284                 }
2285                 ext->align = btf__align_of(obj->btf, t->type);
2286                 if (ext->align <= 0) {
2287                         pr_warn("failed to determine alignment of extern '%s': %d\n",
2288                                 ext_name, ext->align);
2289                         return -EINVAL;
2290                 }
2291                 ext->type = find_extern_type(obj->btf, t->type,
2292                                              &ext->is_signed);
2293                 if (ext->type == EXT_UNKNOWN) {
2294                         pr_warn("extern '%s' type is unsupported\n", ext_name);
2295                         return -ENOTSUP;
2296                 }
2297         }
2298         pr_debug("collected %d externs total\n", obj->nr_extern);
2299
2300         if (!obj->nr_extern)
2301                 return 0;
2302
2303         /* sort externs by (alignment, size, name) and calculate their offsets
2304          * within a map */
2305         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
2306         off = 0;
2307         for (i = 0; i < obj->nr_extern; i++) {
2308                 ext = &obj->externs[i];
2309                 ext->data_off = roundup(off, ext->align);
2310                 off = ext->data_off + ext->sz;
2311                 pr_debug("extern #%d: symbol %d, off %u, name %s\n",
2312                          i, ext->sym_idx, ext->data_off, ext->name);
2313         }
2314
2315         btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC);
2316         if (btf_id <= 0) {
2317                 pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC);
2318                 return -ESRCH;
2319         }
2320
2321         sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id);
2322         sec->size = off;
2323         n = btf_vlen(sec);
2324         for (i = 0; i < n; i++) {
2325                 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
2326
2327                 t = btf__type_by_id(obj->btf, vs->type);
2328                 ext_name = btf__name_by_offset(obj->btf, t->name_off);
2329                 ext = find_extern_by_name(obj, ext_name);
2330                 if (!ext) {
2331                         pr_warn("failed to find extern definition for BTF var '%s'\n",
2332                                 ext_name);
2333                         return -ESRCH;
2334                 }
2335                 vs->offset = ext->data_off;
2336                 btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
2337         }
2338
2339         return 0;
2340 }
2341
2342 static struct bpf_program *
2343 bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
2344 {
2345         struct bpf_program *prog;
2346         size_t i;
2347
2348         for (i = 0; i < obj->nr_programs; i++) {
2349                 prog = &obj->programs[i];
2350                 if (prog->idx == idx)
2351                         return prog;
2352         }
2353         return NULL;
2354 }
2355
2356 struct bpf_program *
2357 bpf_object__find_program_by_title(const struct bpf_object *obj,
2358                                   const char *title)
2359 {
2360         struct bpf_program *pos;
2361
2362         bpf_object__for_each_program(pos, obj) {
2363                 if (pos->section_name && !strcmp(pos->section_name, title))
2364                         return pos;
2365         }
2366         return NULL;
2367 }
2368
2369 struct bpf_program *
2370 bpf_object__find_program_by_name(const struct bpf_object *obj,
2371                                  const char *name)
2372 {
2373         struct bpf_program *prog;
2374
2375         bpf_object__for_each_program(prog, obj) {
2376                 if (!strcmp(prog->name, name))
2377                         return prog;
2378         }
2379         return NULL;
2380 }
2381
2382 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
2383                                       int shndx)
2384 {
2385         return shndx == obj->efile.data_shndx ||
2386                shndx == obj->efile.bss_shndx ||
2387                shndx == obj->efile.rodata_shndx;
2388 }
2389
2390 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
2391                                       int shndx)
2392 {
2393         return shndx == obj->efile.maps_shndx ||
2394                shndx == obj->efile.btf_maps_shndx;
2395 }
2396
2397 static enum libbpf_map_type
2398 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
2399 {
2400         if (shndx == obj->efile.data_shndx)
2401                 return LIBBPF_MAP_DATA;
2402         else if (shndx == obj->efile.bss_shndx)
2403                 return LIBBPF_MAP_BSS;
2404         else if (shndx == obj->efile.rodata_shndx)
2405                 return LIBBPF_MAP_RODATA;
2406         else if (shndx == obj->efile.symbols_shndx)
2407                 return LIBBPF_MAP_KCONFIG;
2408         else
2409                 return LIBBPF_MAP_UNSPEC;
2410 }
2411
2412 static int bpf_program__record_reloc(struct bpf_program *prog,
2413                                      struct reloc_desc *reloc_desc,
2414                                      __u32 insn_idx, const char *name,
2415                                      const GElf_Sym *sym, const GElf_Rel *rel)
2416 {
2417         struct bpf_insn *insn = &prog->insns[insn_idx];
2418         size_t map_idx, nr_maps = prog->obj->nr_maps;
2419         struct bpf_object *obj = prog->obj;
2420         __u32 shdr_idx = sym->st_shndx;
2421         enum libbpf_map_type type;
2422         struct bpf_map *map;
2423
2424         /* sub-program call relocation */
2425         if (insn->code == (BPF_JMP | BPF_CALL)) {
2426                 if (insn->src_reg != BPF_PSEUDO_CALL) {
2427                         pr_warn("incorrect bpf_call opcode\n");
2428                         return -LIBBPF_ERRNO__RELOC;
2429                 }
2430                 /* text_shndx can be 0, if no default "main" program exists */
2431                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
2432                         pr_warn("bad call relo against section %u\n", shdr_idx);
2433                         return -LIBBPF_ERRNO__RELOC;
2434                 }
2435                 if (sym->st_value % 8) {
2436                         pr_warn("bad call relo offset: %zu\n",
2437                                 (size_t)sym->st_value);
2438                         return -LIBBPF_ERRNO__RELOC;
2439                 }
2440                 reloc_desc->type = RELO_CALL;
2441                 reloc_desc->insn_idx = insn_idx;
2442                 reloc_desc->sym_off = sym->st_value;
2443                 obj->has_pseudo_calls = true;
2444                 return 0;
2445         }
2446
2447         if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
2448                 pr_warn("invalid relo for insns[%d].code 0x%x\n",
2449                         insn_idx, insn->code);
2450                 return -LIBBPF_ERRNO__RELOC;
2451         }
2452
2453         if (sym_is_extern(sym)) {
2454                 int sym_idx = GELF_R_SYM(rel->r_info);
2455                 int i, n = obj->nr_extern;
2456                 struct extern_desc *ext;
2457
2458                 for (i = 0; i < n; i++) {
2459                         ext = &obj->externs[i];
2460                         if (ext->sym_idx == sym_idx)
2461                                 break;
2462                 }
2463                 if (i >= n) {
2464                         pr_warn("extern relo failed to find extern for sym %d\n",
2465                                 sym_idx);
2466                         return -LIBBPF_ERRNO__RELOC;
2467                 }
2468                 pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n",
2469                          i, ext->name, ext->sym_idx, ext->data_off, insn_idx);
2470                 reloc_desc->type = RELO_EXTERN;
2471                 reloc_desc->insn_idx = insn_idx;
2472                 reloc_desc->sym_off = ext->data_off;
2473                 return 0;
2474         }
2475
2476         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
2477                 pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n",
2478                         name, shdr_idx);
2479                 return -LIBBPF_ERRNO__RELOC;
2480         }
2481
2482         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
2483
2484         /* generic map reference relocation */
2485         if (type == LIBBPF_MAP_UNSPEC) {
2486                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
2487                         pr_warn("bad map relo against section %u\n",
2488                                 shdr_idx);
2489                         return -LIBBPF_ERRNO__RELOC;
2490                 }
2491                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
2492                         map = &obj->maps[map_idx];
2493                         if (map->libbpf_type != type ||
2494                             map->sec_idx != sym->st_shndx ||
2495                             map->sec_offset != sym->st_value)
2496                                 continue;
2497                         pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n",
2498                                  map_idx, map->name, map->sec_idx,
2499                                  map->sec_offset, insn_idx);
2500                         break;
2501                 }
2502                 if (map_idx >= nr_maps) {
2503                         pr_warn("map relo failed to find map for sec %u, off %zu\n",
2504                                 shdr_idx, (size_t)sym->st_value);
2505                         return -LIBBPF_ERRNO__RELOC;
2506                 }
2507                 reloc_desc->type = RELO_LD64;
2508                 reloc_desc->insn_idx = insn_idx;
2509                 reloc_desc->map_idx = map_idx;
2510                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
2511                 return 0;
2512         }
2513
2514         /* global data map relocation */
2515         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
2516                 pr_warn("bad data relo against section %u\n", shdr_idx);
2517                 return -LIBBPF_ERRNO__RELOC;
2518         }
2519         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
2520                 map = &obj->maps[map_idx];
2521                 if (map->libbpf_type != type)
2522                         continue;
2523                 pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n",
2524                          map_idx, map->name, map->sec_idx, map->sec_offset,
2525                          insn_idx);
2526                 break;
2527         }
2528         if (map_idx >= nr_maps) {
2529                 pr_warn("data relo failed to find map for sec %u\n",
2530                         shdr_idx);
2531                 return -LIBBPF_ERRNO__RELOC;
2532         }
2533
2534         reloc_desc->type = RELO_DATA;
2535         reloc_desc->insn_idx = insn_idx;
2536         reloc_desc->map_idx = map_idx;
2537         reloc_desc->sym_off = sym->st_value;
2538         return 0;
2539 }
2540
2541 static int
2542 bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
2543                            Elf_Data *data, struct bpf_object *obj)
2544 {
2545         Elf_Data *symbols = obj->efile.symbols;
2546         int err, i, nrels;
2547
2548         pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
2549         nrels = shdr->sh_size / shdr->sh_entsize;
2550
2551         prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
2552         if (!prog->reloc_desc) {
2553                 pr_warn("failed to alloc memory in relocation\n");
2554                 return -ENOMEM;
2555         }
2556         prog->nr_reloc = nrels;
2557
2558         for (i = 0; i < nrels; i++) {
2559                 const char *name;
2560                 __u32 insn_idx;
2561                 GElf_Sym sym;
2562                 GElf_Rel rel;
2563
2564                 if (!gelf_getrel(data, i, &rel)) {
2565                         pr_warn("relocation: failed to get %d reloc\n", i);
2566                         return -LIBBPF_ERRNO__FORMAT;
2567                 }
2568                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
2569                         pr_warn("relocation: symbol %"PRIx64" not found\n",
2570                                 GELF_R_SYM(rel.r_info));
2571                         return -LIBBPF_ERRNO__FORMAT;
2572                 }
2573                 if (rel.r_offset % sizeof(struct bpf_insn))
2574                         return -LIBBPF_ERRNO__FORMAT;
2575
2576                 insn_idx = rel.r_offset / sizeof(struct bpf_insn);
2577                 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
2578                                   sym.st_name) ? : "<?>";
2579
2580                 pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
2581                          (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info),
2582                          (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info),
2583                          GELF_ST_BIND(sym.st_info), sym.st_name, name,
2584                          insn_idx);
2585
2586                 err = bpf_program__record_reloc(prog, &prog->reloc_desc[i],
2587                                                 insn_idx, name, &sym, &rel);
2588                 if (err)
2589                         return err;
2590         }
2591         return 0;
2592 }
2593
2594 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
2595 {
2596         struct bpf_map_def *def = &map->def;
2597         __u32 key_type_id = 0, value_type_id = 0;
2598         int ret;
2599
2600         /* if it's BTF-defined map, we don't need to search for type IDs */
2601         if (map->sec_idx == obj->efile.btf_maps_shndx)
2602                 return 0;
2603
2604         if (!bpf_map__is_internal(map)) {
2605                 ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
2606                                            def->value_size, &key_type_id,
2607                                            &value_type_id);
2608         } else {
2609                 /*
2610                  * LLVM annotates global data differently in BTF, that is,
2611                  * only as '.data', '.bss' or '.rodata'.
2612                  */
2613                 ret = btf__find_by_name(obj->btf,
2614                                 libbpf_type_to_btf_name[map->libbpf_type]);
2615         }
2616         if (ret < 0)
2617                 return ret;
2618
2619         map->btf_key_type_id = key_type_id;
2620         map->btf_value_type_id = bpf_map__is_internal(map) ?
2621                                  ret : value_type_id;
2622         return 0;
2623 }
2624
2625 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
2626 {
2627         struct bpf_map_info info = {};
2628         __u32 len = sizeof(info);
2629         int new_fd, err;
2630         char *new_name;
2631
2632         err = bpf_obj_get_info_by_fd(fd, &info, &len);
2633         if (err)
2634                 return err;
2635
2636         new_name = strdup(info.name);
2637         if (!new_name)
2638                 return -errno;
2639
2640         new_fd = open("/", O_RDONLY | O_CLOEXEC);
2641         if (new_fd < 0) {
2642                 err = -errno;
2643                 goto err_free_new_name;
2644         }
2645
2646         new_fd = dup3(fd, new_fd, O_CLOEXEC);
2647         if (new_fd < 0) {
2648                 err = -errno;
2649                 goto err_close_new_fd;
2650         }
2651
2652         err = zclose(map->fd);
2653         if (err) {
2654                 err = -errno;
2655                 goto err_close_new_fd;
2656         }
2657         free(map->name);
2658
2659         map->fd = new_fd;
2660         map->name = new_name;
2661         map->def.type = info.type;
2662         map->def.key_size = info.key_size;
2663         map->def.value_size = info.value_size;
2664         map->def.max_entries = info.max_entries;
2665         map->def.map_flags = info.map_flags;
2666         map->btf_key_type_id = info.btf_key_type_id;
2667         map->btf_value_type_id = info.btf_value_type_id;
2668         map->reused = true;
2669
2670         return 0;
2671
2672 err_close_new_fd:
2673         close(new_fd);
2674 err_free_new_name:
2675         free(new_name);
2676         return err;
2677 }
2678
2679 int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
2680 {
2681         if (!map || !max_entries)
2682                 return -EINVAL;
2683
2684         /* If map already created, its attributes can't be changed. */
2685         if (map->fd >= 0)
2686                 return -EBUSY;
2687
2688         map->def.max_entries = max_entries;
2689
2690         return 0;
2691 }
2692
2693 static int
2694 bpf_object__probe_name(struct bpf_object *obj)
2695 {
2696         struct bpf_load_program_attr attr;
2697         char *cp, errmsg[STRERR_BUFSIZE];
2698         struct bpf_insn insns[] = {
2699                 BPF_MOV64_IMM(BPF_REG_0, 0),
2700                 BPF_EXIT_INSN(),
2701         };
2702         int ret;
2703
2704         /* make sure basic loading works */
2705
2706         memset(&attr, 0, sizeof(attr));
2707         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
2708         attr.insns = insns;
2709         attr.insns_cnt = ARRAY_SIZE(insns);
2710         attr.license = "GPL";
2711
2712         ret = bpf_load_program_xattr(&attr, NULL, 0);
2713         if (ret < 0) {
2714                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
2715                 pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n",
2716                         __func__, cp, errno);
2717                 return -errno;
2718         }
2719         close(ret);
2720
2721         /* now try the same program, but with the name */
2722
2723         attr.name = "test";
2724         ret = bpf_load_program_xattr(&attr, NULL, 0);
2725         if (ret >= 0) {
2726                 obj->caps.name = 1;
2727                 close(ret);
2728         }
2729
2730         return 0;
2731 }
2732
2733 static int
2734 bpf_object__probe_global_data(struct bpf_object *obj)
2735 {
2736         struct bpf_load_program_attr prg_attr;
2737         struct bpf_create_map_attr map_attr;
2738         char *cp, errmsg[STRERR_BUFSIZE];
2739         struct bpf_insn insns[] = {
2740                 BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
2741                 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
2742                 BPF_MOV64_IMM(BPF_REG_0, 0),
2743                 BPF_EXIT_INSN(),
2744         };
2745         int ret, map;
2746
2747         memset(&map_attr, 0, sizeof(map_attr));
2748         map_attr.map_type = BPF_MAP_TYPE_ARRAY;
2749         map_attr.key_size = sizeof(int);
2750         map_attr.value_size = 32;
2751         map_attr.max_entries = 1;
2752
2753         map = bpf_create_map_xattr(&map_attr);
2754         if (map < 0) {
2755                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
2756                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
2757                         __func__, cp, errno);
2758                 return -errno;
2759         }
2760
2761         insns[0].imm = map;
2762
2763         memset(&prg_attr, 0, sizeof(prg_attr));
2764         prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
2765         prg_attr.insns = insns;
2766         prg_attr.insns_cnt = ARRAY_SIZE(insns);
2767         prg_attr.license = "GPL";
2768
2769         ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
2770         if (ret >= 0) {
2771                 obj->caps.global_data = 1;
2772                 close(ret);
2773         }
2774
2775         close(map);
2776         return 0;
2777 }
2778
2779 static int bpf_object__probe_btf_func(struct bpf_object *obj)
2780 {
2781         static const char strs[] = "\0int\0x\0a";
2782         /* void x(int a) {} */
2783         __u32 types[] = {
2784                 /* int */
2785                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
2786                 /* FUNC_PROTO */                                /* [2] */
2787                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
2788                 BTF_PARAM_ENC(7, 1),
2789                 /* FUNC x */                                    /* [3] */
2790                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
2791         };
2792         int btf_fd;
2793
2794         btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
2795                                       strs, sizeof(strs));
2796         if (btf_fd >= 0) {
2797                 obj->caps.btf_func = 1;
2798                 close(btf_fd);
2799                 return 1;
2800         }
2801
2802         return 0;
2803 }
2804
2805 static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
2806 {
2807         static const char strs[] = "\0x\0.data";
2808         /* static int a; */
2809         __u32 types[] = {
2810                 /* int */
2811                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
2812                 /* VAR x */                                     /* [2] */
2813                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
2814                 BTF_VAR_STATIC,
2815                 /* DATASEC val */                               /* [3] */
2816                 BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
2817                 BTF_VAR_SECINFO_ENC(2, 0, 4),
2818         };
2819         int btf_fd;
2820
2821         btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
2822                                       strs, sizeof(strs));
2823         if (btf_fd >= 0) {
2824                 obj->caps.btf_datasec = 1;
2825                 close(btf_fd);
2826                 return 1;
2827         }
2828
2829         return 0;
2830 }
2831
2832 static int bpf_object__probe_array_mmap(struct bpf_object *obj)
2833 {
2834         struct bpf_create_map_attr attr = {
2835                 .map_type = BPF_MAP_TYPE_ARRAY,
2836                 .map_flags = BPF_F_MMAPABLE,
2837                 .key_size = sizeof(int),
2838                 .value_size = sizeof(int),
2839                 .max_entries = 1,
2840         };
2841         int fd;
2842
2843         fd = bpf_create_map_xattr(&attr);
2844         if (fd >= 0) {
2845                 obj->caps.array_mmap = 1;
2846                 close(fd);
2847                 return 1;
2848         }
2849
2850         return 0;
2851 }
2852
2853 static int
2854 bpf_object__probe_caps(struct bpf_object *obj)
2855 {
2856         int (*probe_fn[])(struct bpf_object *obj) = {
2857                 bpf_object__probe_name,
2858                 bpf_object__probe_global_data,
2859                 bpf_object__probe_btf_func,
2860                 bpf_object__probe_btf_datasec,
2861                 bpf_object__probe_array_mmap,
2862         };
2863         int i, ret;
2864
2865         for (i = 0; i < ARRAY_SIZE(probe_fn); i++) {
2866                 ret = probe_fn[i](obj);
2867                 if (ret < 0)
2868                         pr_debug("Probe #%d failed with %d.\n", i, ret);
2869         }
2870
2871         return 0;
2872 }
2873
2874 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
2875 {
2876         struct bpf_map_info map_info = {};
2877         char msg[STRERR_BUFSIZE];
2878         __u32 map_info_len;
2879
2880         map_info_len = sizeof(map_info);
2881
2882         if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
2883                 pr_warn("failed to get map info for map FD %d: %s\n",
2884                         map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
2885                 return false;
2886         }
2887
2888         return (map_info.type == map->def.type &&
2889                 map_info.key_size == map->def.key_size &&
2890                 map_info.value_size == map->def.value_size &&
2891                 map_info.max_entries == map->def.max_entries &&
2892                 map_info.map_flags == map->def.map_flags);
2893 }
2894
2895 static int
2896 bpf_object__reuse_map(struct bpf_map *map)
2897 {
2898         char *cp, errmsg[STRERR_BUFSIZE];
2899         int err, pin_fd;
2900
2901         pin_fd = bpf_obj_get(map->pin_path);
2902         if (pin_fd < 0) {
2903                 err = -errno;
2904                 if (err == -ENOENT) {
2905                         pr_debug("found no pinned map to reuse at '%s'\n",
2906                                  map->pin_path);
2907                         return 0;
2908                 }
2909
2910                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
2911                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
2912                         map->pin_path, cp);
2913                 return err;
2914         }
2915
2916         if (!map_is_reuse_compat(map, pin_fd)) {
2917                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
2918                         map->pin_path);
2919                 close(pin_fd);
2920                 return -EINVAL;
2921         }
2922
2923         err = bpf_map__reuse_fd(map, pin_fd);
2924         if (err) {
2925                 close(pin_fd);
2926                 return err;
2927         }
2928         map->pinned = true;
2929         pr_debug("reused pinned map at '%s'\n", map->pin_path);
2930
2931         return 0;
2932 }
2933
2934 static int
2935 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
2936 {
2937         enum libbpf_map_type map_type = map->libbpf_type;
2938         char *cp, errmsg[STRERR_BUFSIZE];
2939         int err, zero = 0;
2940
2941         /* kernel already zero-initializes .bss map. */
2942         if (map_type == LIBBPF_MAP_BSS)
2943                 return 0;
2944
2945         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
2946         if (err) {
2947                 err = -errno;
2948                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
2949                 pr_warn("Error setting initial map(%s) contents: %s\n",
2950                         map->name, cp);
2951                 return err;
2952         }
2953
2954         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
2955         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
2956                 err = bpf_map_freeze(map->fd);
2957                 if (err) {
2958                         err = -errno;
2959                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
2960                         pr_warn("Error freezing map(%s) as read-only: %s\n",
2961                                 map->name, cp);
2962                         return err;
2963                 }
2964         }
2965         return 0;
2966 }
2967
2968 static int
2969 bpf_object__create_maps(struct bpf_object *obj)
2970 {
2971         struct bpf_create_map_attr create_attr = {};
2972         int nr_cpus = 0;
2973         unsigned int i;
2974         int err;
2975
2976         for (i = 0; i < obj->nr_maps; i++) {
2977                 struct bpf_map *map = &obj->maps[i];
2978                 struct bpf_map_def *def = &map->def;
2979                 char *cp, errmsg[STRERR_BUFSIZE];
2980                 int *pfd = &map->fd;
2981
2982                 if (map->pin_path) {
2983                         err = bpf_object__reuse_map(map);
2984                         if (err) {
2985                                 pr_warn("error reusing pinned map %s\n",
2986                                         map->name);
2987                                 return err;
2988                         }
2989                 }
2990
2991                 if (map->fd >= 0) {
2992                         pr_debug("skip map create (preset) %s: fd=%d\n",
2993                                  map->name, map->fd);
2994                         continue;
2995                 }
2996
2997                 if (obj->caps.name)
2998                         create_attr.name = map->name;
2999                 create_attr.map_ifindex = map->map_ifindex;
3000                 create_attr.map_type = def->type;
3001                 create_attr.map_flags = def->map_flags;
3002                 create_attr.key_size = def->key_size;
3003                 create_attr.value_size = def->value_size;
3004                 if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
3005                     !def->max_entries) {
3006                         if (!nr_cpus)
3007                                 nr_cpus = libbpf_num_possible_cpus();
3008                         if (nr_cpus < 0) {
3009                                 pr_warn("failed to determine number of system CPUs: %d\n",
3010                                         nr_cpus);
3011                                 err = nr_cpus;
3012                                 goto err_out;
3013                         }
3014                         pr_debug("map '%s': setting size to %d\n",
3015                                  map->name, nr_cpus);
3016                         create_attr.max_entries = nr_cpus;
3017                 } else {
3018                         create_attr.max_entries = def->max_entries;
3019                 }
3020                 create_attr.btf_fd = 0;
3021                 create_attr.btf_key_type_id = 0;
3022                 create_attr.btf_value_type_id = 0;
3023                 if (bpf_map_type__is_map_in_map(def->type) &&
3024                     map->inner_map_fd >= 0)
3025                         create_attr.inner_map_fd = map->inner_map_fd;
3026
3027                 if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
3028                         create_attr.btf_fd = btf__fd(obj->btf);
3029                         create_attr.btf_key_type_id = map->btf_key_type_id;
3030                         create_attr.btf_value_type_id = map->btf_value_type_id;
3031                 }
3032
3033                 *pfd = bpf_create_map_xattr(&create_attr);
3034                 if (*pfd < 0 && (create_attr.btf_key_type_id ||
3035                                  create_attr.btf_value_type_id)) {
3036                         err = -errno;
3037                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
3038                         pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
3039                                 map->name, cp, err);
3040                         create_attr.btf_fd = 0;
3041                         create_attr.btf_key_type_id = 0;
3042                         create_attr.btf_value_type_id = 0;
3043                         map->btf_key_type_id = 0;
3044                         map->btf_value_type_id = 0;
3045                         *pfd = bpf_create_map_xattr(&create_attr);
3046                 }
3047
3048                 if (*pfd < 0) {
3049                         size_t j;
3050
3051                         err = -errno;
3052 err_out:
3053                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
3054                         pr_warn("failed to create map (name: '%s'): %s(%d)\n",
3055                                 map->name, cp, err);
3056                         pr_perm_msg(err);
3057                         for (j = 0; j < i; j++)
3058                                 zclose(obj->maps[j].fd);
3059                         return err;
3060                 }
3061
3062                 if (bpf_map__is_internal(map)) {
3063                         err = bpf_object__populate_internal_map(obj, map);
3064                         if (err < 0) {
3065                                 zclose(*pfd);
3066                                 goto err_out;
3067                         }
3068                 }
3069
3070                 if (map->pin_path && !map->pinned) {
3071                         err = bpf_map__pin(map, NULL);
3072                         if (err) {
3073                                 pr_warn("failed to auto-pin map name '%s' at '%s'\n",
3074                                         map->name, map->pin_path);
3075                                 return err;
3076                         }
3077                 }
3078
3079                 pr_debug("created map %s: fd=%d\n", map->name, *pfd);
3080         }
3081
3082         return 0;
3083 }
3084
3085 static int
3086 check_btf_ext_reloc_err(struct bpf_program *prog, int err,
3087                         void *btf_prog_info, const char *info_name)
3088 {
3089         if (err != -ENOENT) {
3090                 pr_warn("Error in loading %s for sec %s.\n",
3091                         info_name, prog->section_name);
3092                 return err;
3093         }
3094
3095         /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */
3096
3097         if (btf_prog_info) {
3098                 /*
3099                  * Some info has already been found but has problem
3100                  * in the last btf_ext reloc. Must have to error out.
3101                  */
3102                 pr_warn("Error in relocating %s for sec %s.\n",
3103                         info_name, prog->section_name);
3104                 return err;
3105         }
3106
3107         /* Have problem loading the very first info. Ignore the rest. */
3108         pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n",
3109                 info_name, prog->section_name, info_name);
3110         return 0;
3111 }
3112
3113 static int
3114 bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj,
3115                           const char *section_name,  __u32 insn_offset)
3116 {
3117         int err;
3118
3119         if (!insn_offset || prog->func_info) {
3120                 /*
3121                  * !insn_offset => main program
3122                  *
3123                  * For sub prog, the main program's func_info has to
3124                  * be loaded first (i.e. prog->func_info != NULL)
3125                  */
3126                 err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext,
3127                                                section_name, insn_offset,
3128                                                &prog->func_info,
3129                                                &prog->func_info_cnt);
3130                 if (err)
3131                         return check_btf_ext_reloc_err(prog, err,
3132                                                        prog->func_info,
3133                                                        "bpf_func_info");
3134
3135                 prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext);
3136         }
3137
3138         if (!insn_offset || prog->line_info) {
3139                 err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext,
3140                                                section_name, insn_offset,
3141                                                &prog->line_info,
3142                                                &prog->line_info_cnt);
3143                 if (err)
3144                         return check_btf_ext_reloc_err(prog, err,
3145                                                        prog->line_info,
3146                                                        "bpf_line_info");
3147
3148                 prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext);
3149         }
3150
3151         return 0;
3152 }
3153
3154 #define BPF_CORE_SPEC_MAX_LEN 64
3155
3156 /* represents BPF CO-RE field or array element accessor */
3157 struct bpf_core_accessor {
3158         __u32 type_id;          /* struct/union type or array element type */
3159         __u32 idx;              /* field index or array index */
3160         const char *name;       /* field name or NULL for array accessor */
3161 };
3162
3163 struct bpf_core_spec {
3164         const struct btf *btf;
3165         /* high-level spec: named fields and array indices only */
3166         struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
3167         /* high-level spec length */
3168         int len;
3169         /* raw, low-level spec: 1-to-1 with accessor spec string */
3170         int raw_spec[BPF_CORE_SPEC_MAX_LEN];
3171         /* raw spec length */
3172         int raw_len;
3173         /* field bit offset represented by spec */
3174         __u32 bit_offset;
3175 };
3176
3177 static bool str_is_empty(const char *s)
3178 {
3179         return !s || !s[0];
3180 }
3181
3182 static bool is_flex_arr(const struct btf *btf,
3183                         const struct bpf_core_accessor *acc,
3184                         const struct btf_array *arr)
3185 {
3186         const struct btf_type *t;
3187
3188         /* not a flexible array, if not inside a struct or has non-zero size */
3189         if (!acc->name || arr->nelems > 0)
3190                 return false;
3191
3192         /* has to be the last member of enclosing struct */
3193         t = btf__type_by_id(btf, acc->type_id);
3194         return acc->idx == btf_vlen(t) - 1;
3195 }
3196
3197 /*
3198  * Turn bpf_field_reloc into a low- and high-level spec representation,
3199  * validating correctness along the way, as well as calculating resulting
3200  * field bit offset, specified by accessor string. Low-level spec captures
3201  * every single level of nestedness, including traversing anonymous
3202  * struct/union members. High-level one only captures semantically meaningful
3203  * "turning points": named fields and array indicies.
3204  * E.g., for this case:
3205  *
3206  *   struct sample {
3207  *       int __unimportant;
3208  *       struct {
3209  *           int __1;
3210  *           int __2;
3211  *           int a[7];
3212  *       };
3213  *   };
3214  *
3215  *   struct sample *s = ...;
3216  *
3217  *   int x = &s->a[3]; // access string = '0:1:2:3'
3218  *
3219  * Low-level spec has 1:1 mapping with each element of access string (it's
3220  * just a parsed access string representation): [0, 1, 2, 3].
3221  *
3222  * High-level spec will capture only 3 points:
3223  *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
3224  *   - field 'a' access (corresponds to '2' in low-level spec);
3225  *   - array element #3 access (corresponds to '3' in low-level spec).
3226  *
3227  */
3228 static int bpf_core_spec_parse(const struct btf *btf,
3229                                __u32 type_id,
3230                                const char *spec_str,
3231                                struct bpf_core_spec *spec)
3232 {
3233         int access_idx, parsed_len, i;
3234         struct bpf_core_accessor *acc;
3235         const struct btf_type *t;
3236         const char *name;
3237         __u32 id;
3238         __s64 sz;
3239
3240         if (str_is_empty(spec_str) || *spec_str == ':')
3241                 return -EINVAL;
3242
3243         memset(spec, 0, sizeof(*spec));
3244         spec->btf = btf;
3245
3246         /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
3247         while (*spec_str) {
3248                 if (*spec_str == ':')
3249                         ++spec_str;
3250                 if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
3251                         return -EINVAL;
3252                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
3253                         return -E2BIG;
3254                 spec_str += parsed_len;
3255                 spec->raw_spec[spec->raw_len++] = access_idx;
3256         }
3257
3258         if (spec->raw_len == 0)
3259                 return -EINVAL;
3260
3261         /* first spec value is always reloc type array index */
3262         t = skip_mods_and_typedefs(btf, type_id, &id);
3263         if (!t)
3264                 return -EINVAL;
3265
3266         access_idx = spec->raw_spec[0];
3267         spec->spec[0].type_id = id;
3268         spec->spec[0].idx = access_idx;
3269         spec->len++;
3270
3271         sz = btf__resolve_size(btf, id);
3272         if (sz < 0)
3273                 return sz;
3274         spec->bit_offset = access_idx * sz * 8;
3275
3276         for (i = 1; i < spec->raw_len; i++) {
3277                 t = skip_mods_and_typedefs(btf, id, &id);
3278                 if (!t)
3279                         return -EINVAL;
3280
3281                 access_idx = spec->raw_spec[i];
3282                 acc = &spec->spec[spec->len];
3283
3284                 if (btf_is_composite(t)) {
3285                         const struct btf_member *m;
3286                         __u32 bit_offset;
3287
3288                         if (access_idx >= btf_vlen(t))
3289                                 return -EINVAL;
3290
3291                         bit_offset = btf_member_bit_offset(t, access_idx);
3292                         spec->bit_offset += bit_offset;
3293
3294                         m = btf_members(t) + access_idx;
3295                         if (m->name_off) {
3296                                 name = btf__name_by_offset(btf, m->name_off);
3297                                 if (str_is_empty(name))
3298                                         return -EINVAL;
3299
3300                                 acc->type_id = id;
3301                                 acc->idx = access_idx;
3302                                 acc->name = name;
3303                                 spec->len++;
3304                         }
3305
3306                         id = m->type;
3307                 } else if (btf_is_array(t)) {
3308                         const struct btf_array *a = btf_array(t);
3309                         bool flex;
3310
3311                         t = skip_mods_and_typedefs(btf, a->type, &id);
3312                         if (!t)
3313                                 return -EINVAL;
3314
3315                         flex = is_flex_arr(btf, acc - 1, a);
3316                         if (!flex && access_idx >= a->nelems)
3317                                 return -EINVAL;
3318
3319                         spec->spec[spec->len].type_id = id;
3320                         spec->spec[spec->len].idx = access_idx;
3321                         spec->len++;
3322
3323                         sz = btf__resolve_size(btf, id);
3324                         if (sz < 0)
3325                                 return sz;
3326                         spec->bit_offset += access_idx * sz * 8;
3327                 } else {
3328                         pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
3329                                 type_id, spec_str, i, id, btf_kind(t));
3330                         return -EINVAL;
3331                 }
3332         }
3333
3334         return 0;
3335 }
3336
3337 static bool bpf_core_is_flavor_sep(const char *s)
3338 {
3339         /* check X___Y name pattern, where X and Y are not underscores */
3340         return s[0] != '_' &&                                 /* X */
3341                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
3342                s[4] != '_';                                   /* Y */
3343 }
3344
3345 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
3346  * before last triple underscore. Struct name part after last triple
3347  * underscore is ignored by BPF CO-RE relocation during relocation matching.
3348  */
3349 static size_t bpf_core_essential_name_len(const char *name)
3350 {
3351         size_t n = strlen(name);
3352         int i;
3353
3354         for (i = n - 5; i >= 0; i--) {
3355                 if (bpf_core_is_flavor_sep(name + i))
3356                         return i + 1;
3357         }
3358         return n;
3359 }
3360
3361 /* dynamically sized list of type IDs */
3362 struct ids_vec {
3363         __u32 *data;
3364         int len;
3365 };
3366
3367 static void bpf_core_free_cands(struct ids_vec *cand_ids)
3368 {
3369         free(cand_ids->data);
3370         free(cand_ids);
3371 }
3372
3373 static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
3374                                            __u32 local_type_id,
3375                                            const struct btf *targ_btf)
3376 {
3377         size_t local_essent_len, targ_essent_len;
3378         const char *local_name, *targ_name;
3379         const struct btf_type *t;
3380         struct ids_vec *cand_ids;
3381         __u32 *new_ids;
3382         int i, err, n;
3383
3384         t = btf__type_by_id(local_btf, local_type_id);
3385         if (!t)
3386                 return ERR_PTR(-EINVAL);
3387
3388         local_name = btf__name_by_offset(local_btf, t->name_off);
3389         if (str_is_empty(local_name))
3390                 return ERR_PTR(-EINVAL);
3391         local_essent_len = bpf_core_essential_name_len(local_name);
3392
3393         cand_ids = calloc(1, sizeof(*cand_ids));
3394         if (!cand_ids)
3395                 return ERR_PTR(-ENOMEM);
3396
3397         n = btf__get_nr_types(targ_btf);
3398         for (i = 1; i <= n; i++) {
3399                 t = btf__type_by_id(targ_btf, i);
3400                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
3401                 if (str_is_empty(targ_name))
3402                         continue;
3403
3404                 targ_essent_len = bpf_core_essential_name_len(targ_name);
3405                 if (targ_essent_len != local_essent_len)
3406                         continue;
3407
3408                 if (strncmp(local_name, targ_name, local_essent_len) == 0) {
3409                         pr_debug("[%d] %s: found candidate [%d] %s\n",
3410                                  local_type_id, local_name, i, targ_name);
3411                         new_ids = realloc(cand_ids->data, cand_ids->len + 1);
3412                         if (!new_ids) {
3413                                 err = -ENOMEM;
3414                                 goto err_out;
3415                         }
3416                         cand_ids->data = new_ids;
3417                         cand_ids->data[cand_ids->len++] = i;
3418                 }
3419         }
3420         return cand_ids;
3421 err_out:
3422         bpf_core_free_cands(cand_ids);
3423         return ERR_PTR(err);
3424 }
3425
3426 /* Check two types for compatibility, skipping const/volatile/restrict and
3427  * typedefs, to ensure we are relocating compatible entities:
3428  *   - any two STRUCTs/UNIONs are compatible and can be mixed;
3429  *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
3430  *   - any two PTRs are always compatible;
3431  *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
3432  *     least one of enums should be anonymous;
3433  *   - for ENUMs, check sizes, names are ignored;
3434  *   - for INT, size and signedness are ignored;
3435  *   - for ARRAY, dimensionality is ignored, element types are checked for
3436  *     compatibility recursively;
3437  *   - everything else shouldn't be ever a target of relocation.
3438  * These rules are not set in stone and probably will be adjusted as we get
3439  * more experience with using BPF CO-RE relocations.
3440  */
3441 static int bpf_core_fields_are_compat(const struct btf *local_btf,
3442                                       __u32 local_id,
3443                                       const struct btf *targ_btf,
3444                                       __u32 targ_id)
3445 {
3446         const struct btf_type *local_type, *targ_type;
3447
3448 recur:
3449         local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
3450         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
3451         if (!local_type || !targ_type)
3452                 return -EINVAL;
3453
3454         if (btf_is_composite(local_type) && btf_is_composite(targ_type))
3455                 return 1;
3456         if (btf_kind(local_type) != btf_kind(targ_type))
3457                 return 0;
3458
3459         switch (btf_kind(local_type)) {
3460         case BTF_KIND_PTR:
3461                 return 1;
3462         case BTF_KIND_FWD:
3463         case BTF_KIND_ENUM: {
3464                 const char *local_name, *targ_name;
3465                 size_t local_len, targ_len;
3466
3467                 local_name = btf__name_by_offset(local_btf,
3468                                                  local_type->name_off);
3469                 targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
3470                 local_len = bpf_core_essential_name_len(local_name);
3471                 targ_len = bpf_core_essential_name_len(targ_name);
3472                 /* one of them is anonymous or both w/ same flavor-less names */
3473                 return local_len == 0 || targ_len == 0 ||
3474                        (local_len == targ_len &&
3475                         strncmp(local_name, targ_name, local_len) == 0);
3476         }
3477         case BTF_KIND_INT:
3478                 /* just reject deprecated bitfield-like integers; all other
3479                  * integers are by default compatible between each other
3480                  */
3481                 return btf_int_offset(local_type) == 0 &&
3482                        btf_int_offset(targ_type) == 0;
3483         case BTF_KIND_ARRAY:
3484                 local_id = btf_array(local_type)->type;
3485                 targ_id = btf_array(targ_type)->type;
3486                 goto recur;
3487         default:
3488                 pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
3489                         btf_kind(local_type), local_id, targ_id);
3490                 return 0;
3491         }
3492 }
3493
3494 /*
3495  * Given single high-level named field accessor in local type, find
3496  * corresponding high-level accessor for a target type. Along the way,
3497  * maintain low-level spec for target as well. Also keep updating target
3498  * bit offset.
3499  *
3500  * Searching is performed through recursive exhaustive enumeration of all
3501  * fields of a struct/union. If there are any anonymous (embedded)
3502  * structs/unions, they are recursively searched as well. If field with
3503  * desired name is found, check compatibility between local and target types,
3504  * before returning result.
3505  *
3506  * 1 is returned, if field is found.
3507  * 0 is returned if no compatible field is found.
3508  * <0 is returned on error.
3509  */
3510 static int bpf_core_match_member(const struct btf *local_btf,
3511                                  const struct bpf_core_accessor *local_acc,
3512                                  const struct btf *targ_btf,
3513                                  __u32 targ_id,
3514                                  struct bpf_core_spec *spec,
3515                                  __u32 *next_targ_id)
3516 {
3517         const struct btf_type *local_type, *targ_type;
3518         const struct btf_member *local_member, *m;
3519         const char *local_name, *targ_name;
3520         __u32 local_id;
3521         int i, n, found;
3522
3523         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
3524         if (!targ_type)
3525                 return -EINVAL;
3526         if (!btf_is_composite(targ_type))
3527                 return 0;
3528
3529         local_id = local_acc->type_id;
3530         local_type = btf__type_by_id(local_btf, local_id);
3531         local_member = btf_members(local_type) + local_acc->idx;
3532         local_name = btf__name_by_offset(local_btf, local_member->name_off);
3533
3534         n = btf_vlen(targ_type);
3535         m = btf_members(targ_type);
3536         for (i = 0; i < n; i++, m++) {
3537                 __u32 bit_offset;
3538
3539                 bit_offset = btf_member_bit_offset(targ_type, i);
3540
3541                 /* too deep struct/union/array nesting */
3542                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
3543                         return -E2BIG;
3544
3545                 /* speculate this member will be the good one */
3546                 spec->bit_offset += bit_offset;
3547                 spec->raw_spec[spec->raw_len++] = i;
3548
3549                 targ_name = btf__name_by_offset(targ_btf, m->name_off);
3550                 if (str_is_empty(targ_name)) {
3551                         /* embedded struct/union, we need to go deeper */
3552                         found = bpf_core_match_member(local_btf, local_acc,
3553                                                       targ_btf, m->type,
3554                                                       spec, next_targ_id);
3555                         if (found) /* either found or error */
3556                                 return found;
3557                 } else if (strcmp(local_name, targ_name) == 0) {
3558                         /* matching named field */
3559                         struct bpf_core_accessor *targ_acc;
3560
3561                         targ_acc = &spec->spec[spec->len++];
3562                         targ_acc->type_id = targ_id;
3563                         targ_acc->idx = i;
3564                         targ_acc->name = targ_name;
3565
3566                         *next_targ_id = m->type;
3567                         found = bpf_core_fields_are_compat(local_btf,
3568                                                            local_member->type,
3569                                                            targ_btf, m->type);
3570                         if (!found)
3571                                 spec->len--; /* pop accessor */
3572                         return found;
3573                 }
3574                 /* member turned out not to be what we looked for */
3575                 spec->bit_offset -= bit_offset;
3576                 spec->raw_len--;
3577         }
3578
3579         return 0;
3580 }
3581
3582 /*
3583  * Try to match local spec to a target type and, if successful, produce full
3584  * target spec (high-level, low-level + bit offset).
3585  */
3586 static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
3587                                const struct btf *targ_btf, __u32 targ_id,
3588                                struct bpf_core_spec *targ_spec)
3589 {
3590         const struct btf_type *targ_type;
3591         const struct bpf_core_accessor *local_acc;
3592         struct bpf_core_accessor *targ_acc;
3593         int i, sz, matched;
3594
3595         memset(targ_spec, 0, sizeof(*targ_spec));
3596         targ_spec->btf = targ_btf;
3597
3598         local_acc = &local_spec->spec[0];
3599         targ_acc = &targ_spec->spec[0];
3600
3601         for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
3602                 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
3603                                                    &targ_id);
3604                 if (!targ_type)
3605                         return -EINVAL;
3606
3607                 if (local_acc->name) {
3608                         matched = bpf_core_match_member(local_spec->btf,
3609                                                         local_acc,
3610                                                         targ_btf, targ_id,
3611                                                         targ_spec, &targ_id);
3612                         if (matched <= 0)
3613                                 return matched;
3614                 } else {
3615                         /* for i=0, targ_id is already treated as array element
3616                          * type (because it's the original struct), for others
3617                          * we should find array element type first
3618                          */
3619                         if (i > 0) {
3620                                 const struct btf_array *a;
3621                                 bool flex;
3622
3623                                 if (!btf_is_array(targ_type))
3624                                         return 0;
3625
3626                                 a = btf_array(targ_type);
3627                                 flex = is_flex_arr(targ_btf, targ_acc - 1, a);
3628                                 if (!flex && local_acc->idx >= a->nelems)
3629                                         return 0;
3630                                 if (!skip_mods_and_typedefs(targ_btf, a->type,
3631                                                             &targ_id))
3632                                         return -EINVAL;
3633                         }
3634
3635                         /* too deep struct/union/array nesting */
3636                         if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
3637                                 return -E2BIG;
3638
3639                         targ_acc->type_id = targ_id;
3640                         targ_acc->idx = local_acc->idx;
3641                         targ_acc->name = NULL;
3642                         targ_spec->len++;
3643                         targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
3644                         targ_spec->raw_len++;
3645
3646                         sz = btf__resolve_size(targ_btf, targ_id);
3647                         if (sz < 0)
3648                                 return sz;
3649                         targ_spec->bit_offset += local_acc->idx * sz * 8;
3650                 }
3651         }
3652
3653         return 1;
3654 }
3655
3656 static int bpf_core_calc_field_relo(const struct bpf_program *prog,
3657                                     const struct bpf_field_reloc *relo,
3658                                     const struct bpf_core_spec *spec,
3659                                     __u32 *val, bool *validate)
3660 {
3661         const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1];
3662         const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id);
3663         __u32 byte_off, byte_sz, bit_off, bit_sz;
3664         const struct btf_member *m;
3665         const struct btf_type *mt;
3666         bool bitfield;
3667         __s64 sz;
3668
3669         /* a[n] accessor needs special handling */
3670         if (!acc->name) {
3671                 if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
3672                         *val = spec->bit_offset / 8;
3673                 } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
3674                         sz = btf__resolve_size(spec->btf, acc->type_id);
3675                         if (sz < 0)
3676                                 return -EINVAL;
3677                         *val = sz;
3678                 } else {
3679                         pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
3680                                 bpf_program__title(prog, false),
3681                                 relo->kind, relo->insn_off / 8);
3682                         return -EINVAL;
3683                 }
3684                 if (validate)
3685                         *validate = true;
3686                 return 0;
3687         }
3688
3689         m = btf_members(t) + acc->idx;
3690         mt = skip_mods_and_typedefs(spec->btf, m->type, NULL);
3691         bit_off = spec->bit_offset;
3692         bit_sz = btf_member_bitfield_size(t, acc->idx);
3693
3694         bitfield = bit_sz > 0;
3695         if (bitfield) {
3696                 byte_sz = mt->size;
3697                 byte_off = bit_off / 8 / byte_sz * byte_sz;
3698                 /* figure out smallest int size necessary for bitfield load */
3699                 while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
3700                         if (byte_sz >= 8) {
3701                                 /* bitfield can't be read with 64-bit read */
3702                                 pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
3703                                         bpf_program__title(prog, false),
3704                                         relo->kind, relo->insn_off / 8);
3705                                 return -E2BIG;
3706                         }
3707                         byte_sz *= 2;
3708                         byte_off = bit_off / 8 / byte_sz * byte_sz;
3709                 }
3710         } else {
3711                 sz = btf__resolve_size(spec->btf, m->type);
3712                 if (sz < 0)
3713                         return -EINVAL;
3714                 byte_sz = sz;
3715                 byte_off = spec->bit_offset / 8;
3716                 bit_sz = byte_sz * 8;
3717         }
3718
3719         /* for bitfields, all the relocatable aspects are ambiguous and we
3720          * might disagree with compiler, so turn off validation of expected
3721          * value, except for signedness
3722          */
3723         if (validate)
3724                 *validate = !bitfield;
3725
3726         switch (relo->kind) {
3727         case BPF_FIELD_BYTE_OFFSET:
3728                 *val = byte_off;
3729                 break;
3730         case BPF_FIELD_BYTE_SIZE:
3731                 *val = byte_sz;
3732                 break;
3733         case BPF_FIELD_SIGNED:
3734                 /* enums will be assumed unsigned */
3735                 *val = btf_is_enum(mt) ||
3736                        (btf_int_encoding(mt) & BTF_INT_SIGNED);
3737                 if (validate)
3738                         *validate = true; /* signedness is never ambiguous */
3739                 break;
3740         case BPF_FIELD_LSHIFT_U64:
3741 #if __BYTE_ORDER == __LITTLE_ENDIAN
3742                 *val = 64 - (bit_off + bit_sz - byte_off  * 8);
3743 #else
3744                 *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
3745 #endif
3746                 break;
3747         case BPF_FIELD_RSHIFT_U64:
3748                 *val = 64 - bit_sz;
3749                 if (validate)
3750                         *validate = true; /* right shift is never ambiguous */
3751                 break;
3752         case BPF_FIELD_EXISTS:
3753         default:
3754                 pr_warn("prog '%s': unknown relo %d at insn #%d\n",
3755                         bpf_program__title(prog, false),
3756                         relo->kind, relo->insn_off / 8);
3757                 return -EINVAL;
3758         }
3759
3760         return 0;
3761 }
3762
3763 /*
3764  * Patch relocatable BPF instruction.
3765  *
3766  * Patched value is determined by relocation kind and target specification.
3767  * For field existence relocation target spec will be NULL if field is not
3768  * found.
3769  * Expected insn->imm value is determined using relocation kind and local
3770  * spec, and is checked before patching instruction. If actual insn->imm value
3771  * is wrong, bail out with error.
3772  *
3773  * Currently three kinds of BPF instructions are supported:
3774  * 1. rX = <imm> (assignment with immediate operand);
3775  * 2. rX += <imm> (arithmetic operations with immediate operand);
3776  */
3777 static int bpf_core_reloc_insn(struct bpf_program *prog,
3778                                const struct bpf_field_reloc *relo,
3779                                const struct bpf_core_spec *local_spec,
3780                                const struct bpf_core_spec *targ_spec)
3781 {
3782         bool failed = false, validate = true;
3783         __u32 orig_val, new_val;
3784         struct bpf_insn *insn;
3785         int insn_idx, err;
3786         __u8 class;
3787
3788         if (relo->insn_off % sizeof(struct bpf_insn))
3789                 return -EINVAL;
3790         insn_idx = relo->insn_off / sizeof(struct bpf_insn);
3791
3792         if (relo->kind == BPF_FIELD_EXISTS) {
3793                 orig_val = 1; /* can't generate EXISTS relo w/o local field */
3794                 new_val = targ_spec ? 1 : 0;
3795         } else if (!targ_spec) {
3796                 failed = true;
3797                 new_val = (__u32)-1;
3798         } else {
3799                 err = bpf_core_calc_field_relo(prog, relo, local_spec,
3800                                                &orig_val, &validate);
3801                 if (err)
3802                         return err;
3803                 err = bpf_core_calc_field_relo(prog, relo, targ_spec,
3804                                                &new_val, NULL);
3805                 if (err)
3806                         return err;
3807         }
3808
3809         insn = &prog->insns[insn_idx];
3810         class = BPF_CLASS(insn->code);
3811
3812         if (class == BPF_ALU || class == BPF_ALU64) {
3813                 if (BPF_SRC(insn->code) != BPF_K)
3814                         return -EINVAL;
3815                 if (!failed && validate && insn->imm != orig_val) {
3816                         pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n",
3817                                 bpf_program__title(prog, false), insn_idx,
3818                                 insn->imm, orig_val, new_val);
3819                         return -EINVAL;
3820                 }
3821                 orig_val = insn->imm;
3822                 insn->imm = new_val;
3823                 pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n",
3824                          bpf_program__title(prog, false), insn_idx,
3825                          failed ? " w/ failed reloc" : "", orig_val, new_val);
3826         } else {
3827                 pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
3828                         bpf_program__title(prog, false),
3829                         insn_idx, insn->code, insn->src_reg, insn->dst_reg,
3830                         insn->off, insn->imm);
3831                 return -EINVAL;
3832         }
3833
3834         return 0;
3835 }
3836
3837 static struct btf *btf_load_raw(const char *path)
3838 {
3839         struct btf *btf;
3840         size_t read_cnt;
3841         struct stat st;
3842         void *data;
3843         FILE *f;
3844
3845         if (stat(path, &st))
3846                 return ERR_PTR(-errno);
3847
3848         data = malloc(st.st_size);
3849         if (!data)
3850                 return ERR_PTR(-ENOMEM);
3851
3852         f = fopen(path, "rb");
3853         if (!f) {
3854                 btf = ERR_PTR(-errno);
3855                 goto cleanup;
3856         }
3857
3858         read_cnt = fread(data, 1, st.st_size, f);
3859         fclose(f);
3860         if (read_cnt < st.st_size) {
3861                 btf = ERR_PTR(-EBADF);
3862                 goto cleanup;
3863         }
3864
3865         btf = btf__new(data, read_cnt);
3866
3867 cleanup:
3868         free(data);
3869         return btf;
3870 }
3871
3872 /*
3873  * Probe few well-known locations for vmlinux kernel image and try to load BTF
3874  * data out of it to use for target BTF.
3875  */
3876 static struct btf *bpf_core_find_kernel_btf(void)
3877 {
3878         struct {
3879                 const char *path_fmt;
3880                 bool raw_btf;
3881         } locations[] = {
3882                 /* try canonical vmlinux BTF through sysfs first */
3883                 { "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
3884                 /* fall back to trying to find vmlinux ELF on disk otherwise */
3885                 { "/boot/vmlinux-%1$s" },
3886                 { "/lib/modules/%1$s/vmlinux-%1$s" },
3887                 { "/lib/modules/%1$s/build/vmlinux" },
3888                 { "/usr/lib/modules/%1$s/kernel/vmlinux" },
3889                 { "/usr/lib/debug/boot/vmlinux-%1$s" },
3890                 { "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
3891                 { "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
3892         };
3893         char path[PATH_MAX + 1];
3894         struct utsname buf;
3895         struct btf *btf;
3896         int i;
3897
3898         uname(&buf);
3899
3900         for (i = 0; i < ARRAY_SIZE(locations); i++) {
3901                 snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
3902
3903                 if (access(path, R_OK))
3904                         continue;
3905
3906                 if (locations[i].raw_btf)
3907                         btf = btf_load_raw(path);
3908                 else
3909                         btf = btf__parse_elf(path, NULL);
3910
3911                 pr_debug("loading kernel BTF '%s': %ld\n",
3912                          path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
3913                 if (IS_ERR(btf))
3914                         continue;
3915
3916                 return btf;
3917         }
3918
3919         pr_warn("failed to find valid kernel BTF\n");
3920         return ERR_PTR(-ESRCH);
3921 }
3922
3923 /* Output spec definition in the format:
3924  * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
3925  * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
3926  */
3927 static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
3928 {
3929         const struct btf_type *t;
3930         const char *s;
3931         __u32 type_id;
3932         int i;
3933
3934         type_id = spec->spec[0].type_id;
3935         t = btf__type_by_id(spec->btf, type_id);
3936         s = btf__name_by_offset(spec->btf, t->name_off);
3937         libbpf_print(level, "[%u] %s + ", type_id, s);
3938
3939         for (i = 0; i < spec->raw_len; i++)
3940                 libbpf_print(level, "%d%s", spec->raw_spec[i],
3941                              i == spec->raw_len - 1 ? " => " : ":");
3942
3943         libbpf_print(level, "%u.%u @ &x",
3944                      spec->bit_offset / 8, spec->bit_offset % 8);
3945
3946         for (i = 0; i < spec->len; i++) {
3947                 if (spec->spec[i].name)
3948                         libbpf_print(level, ".%s", spec->spec[i].name);
3949                 else
3950                         libbpf_print(level, "[%u]", spec->spec[i].idx);
3951         }
3952
3953 }
3954
3955 static size_t bpf_core_hash_fn(const void *key, void *ctx)
3956 {
3957         return (size_t)key;
3958 }
3959
3960 static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
3961 {
3962         return k1 == k2;
3963 }
3964
3965 static void *u32_as_hash_key(__u32 x)
3966 {
3967         return (void *)(uintptr_t)x;
3968 }
3969
3970 /*
3971  * CO-RE relocate single instruction.
3972  *
3973  * The outline and important points of the algorithm:
3974  * 1. For given local type, find corresponding candidate target types.
3975  *    Candidate type is a type with the same "essential" name, ignoring
3976  *    everything after last triple underscore (___). E.g., `sample`,
3977  *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
3978  *    for each other. Names with triple underscore are referred to as
3979  *    "flavors" and are useful, among other things, to allow to
3980  *    specify/support incompatible variations of the same kernel struct, which
3981  *    might differ between different kernel versions and/or build
3982  *    configurations.
3983  *
3984  *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
3985  *    converter, when deduplicated BTF of a kernel still contains more than
3986  *    one different types with the same name. In that case, ___2, ___3, etc
3987  *    are appended starting from second name conflict. But start flavors are
3988  *    also useful to be defined "locally", in BPF program, to extract same
3989  *    data from incompatible changes between different kernel
3990  *    versions/configurations. For instance, to handle field renames between
3991  *    kernel versions, one can use two flavors of the struct name with the
3992  *    same common name and use conditional relocations to extract that field,
3993  *    depending on target kernel version.
3994  * 2. For each candidate type, try to match local specification to this
3995  *    candidate target type. Matching involves finding corresponding
3996  *    high-level spec accessors, meaning that all named fields should match,
3997  *    as well as all array accesses should be within the actual bounds. Also,
3998  *    types should be compatible (see bpf_core_fields_are_compat for details).
3999  * 3. It is supported and expected that there might be multiple flavors
4000  *    matching the spec. As long as all the specs resolve to the same set of
4001  *    offsets across all candidates, there is no error. If there is any
4002  *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
4003  *    imprefection of BTF deduplication, which can cause slight duplication of
4004  *    the same BTF type, if some directly or indirectly referenced (by
4005  *    pointer) type gets resolved to different actual types in different
4006  *    object files. If such situation occurs, deduplicated BTF will end up
4007  *    with two (or more) structurally identical types, which differ only in
4008  *    types they refer to through pointer. This should be OK in most cases and
4009  *    is not an error.
4010  * 4. Candidate types search is performed by linearly scanning through all
4011  *    types in target BTF. It is anticipated that this is overall more
4012  *    efficient memory-wise and not significantly worse (if not better)
4013  *    CPU-wise compared to prebuilding a map from all local type names to
4014  *    a list of candidate type names. It's also sped up by caching resolved
4015  *    list of matching candidates per each local "root" type ID, that has at
4016  *    least one bpf_field_reloc associated with it. This list is shared
4017  *    between multiple relocations for the same type ID and is updated as some
4018  *    of the candidates are pruned due to structural incompatibility.
4019  */
4020 static int bpf_core_reloc_field(struct bpf_program *prog,
4021                                  const struct bpf_field_reloc *relo,
4022                                  int relo_idx,
4023                                  const struct btf *local_btf,
4024                                  const struct btf *targ_btf,
4025                                  struct hashmap *cand_cache)
4026 {
4027         const char *prog_name = bpf_program__title(prog, false);
4028         struct bpf_core_spec local_spec, cand_spec, targ_spec;
4029         const void *type_key = u32_as_hash_key(relo->type_id);
4030         const struct btf_type *local_type, *cand_type;
4031         const char *local_name, *cand_name;
4032         struct ids_vec *cand_ids;
4033         __u32 local_id, cand_id;
4034         const char *spec_str;
4035         int i, j, err;
4036
4037         local_id = relo->type_id;
4038         local_type = btf__type_by_id(local_btf, local_id);
4039         if (!local_type)
4040                 return -EINVAL;
4041
4042         local_name = btf__name_by_offset(local_btf, local_type->name_off);
4043         if (str_is_empty(local_name))
4044                 return -EINVAL;
4045
4046         spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
4047         if (str_is_empty(spec_str))
4048                 return -EINVAL;
4049
4050         err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
4051         if (err) {
4052                 pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
4053                         prog_name, relo_idx, local_id, local_name, spec_str,
4054                         err);
4055                 return -EINVAL;
4056         }
4057
4058         pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx,
4059                  relo->kind);
4060         bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
4061         libbpf_print(LIBBPF_DEBUG, "\n");
4062
4063         if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
4064                 cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
4065                 if (IS_ERR(cand_ids)) {
4066                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
4067                                 prog_name, relo_idx, local_id, local_name,
4068                                 PTR_ERR(cand_ids));
4069                         return PTR_ERR(cand_ids);
4070                 }
4071                 err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
4072                 if (err) {
4073                         bpf_core_free_cands(cand_ids);
4074                         return err;
4075                 }
4076         }
4077
4078         for (i = 0, j = 0; i < cand_ids->len; i++) {
4079                 cand_id = cand_ids->data[i];
4080                 cand_type = btf__type_by_id(targ_btf, cand_id);
4081                 cand_name = btf__name_by_offset(targ_btf, cand_type->name_off);
4082
4083                 err = bpf_core_spec_match(&local_spec, targ_btf,
4084                                           cand_id, &cand_spec);
4085                 pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ",
4086                          prog_name, relo_idx, i, cand_name);
4087                 bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
4088                 libbpf_print(LIBBPF_DEBUG, ": %d\n", err);
4089                 if (err < 0) {
4090                         pr_warn("prog '%s': relo #%d: matching error: %d\n",
4091                                 prog_name, relo_idx, err);
4092                         return err;
4093                 }
4094                 if (err == 0)
4095                         continue;
4096
4097                 if (j == 0) {
4098                         targ_spec = cand_spec;
4099                 } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
4100                         /* if there are many candidates, they should all
4101                          * resolve to the same bit offset
4102                          */
4103                         pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
4104                                 prog_name, relo_idx, cand_spec.bit_offset,
4105                                 targ_spec.bit_offset);
4106                         return -EINVAL;
4107                 }
4108
4109                 cand_ids->data[j++] = cand_spec.spec[0].type_id;
4110         }
4111
4112         /*
4113          * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is
4114          * requested, it's expected that we might not find any candidates.
4115          * In this case, if field wasn't found in any candidate, the list of
4116          * candidates shouldn't change at all, we'll just handle relocating
4117          * appropriately, depending on relo's kind.
4118          */
4119         if (j > 0)
4120                 cand_ids->len = j;
4121
4122         if (j == 0 && !prog->obj->relaxed_core_relocs &&
4123             relo->kind != BPF_FIELD_EXISTS) {
4124                 pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
4125                         prog_name, relo_idx, local_id, local_name, spec_str);
4126                 return -ESRCH;
4127         }
4128
4129         /* bpf_core_reloc_insn should know how to handle missing targ_spec */
4130         err = bpf_core_reloc_insn(prog, relo, &local_spec,
4131                                   j ? &targ_spec : NULL);
4132         if (err) {
4133                 pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
4134                         prog_name, relo_idx, relo->insn_off, err);
4135                 return -EINVAL;
4136         }
4137
4138         return 0;
4139 }
4140
4141 static int
4142 bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
4143 {
4144         const struct btf_ext_info_sec *sec;
4145         const struct bpf_field_reloc *rec;
4146         const struct btf_ext_info *seg;
4147         struct hashmap_entry *entry;
4148         struct hashmap *cand_cache = NULL;
4149         struct bpf_program *prog;
4150         struct btf *targ_btf;
4151         const char *sec_name;
4152         int i, err = 0;
4153
4154         if (targ_btf_path)
4155                 targ_btf = btf__parse_elf(targ_btf_path, NULL);
4156         else
4157                 targ_btf = bpf_core_find_kernel_btf();
4158         if (IS_ERR(targ_btf)) {
4159                 pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
4160                 return PTR_ERR(targ_btf);
4161         }
4162
4163         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
4164         if (IS_ERR(cand_cache)) {
4165                 err = PTR_ERR(cand_cache);
4166                 goto out;
4167         }
4168
4169         seg = &obj->btf_ext->field_reloc_info;
4170         for_each_btf_ext_sec(seg, sec) {
4171                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
4172                 if (str_is_empty(sec_name)) {
4173                         err = -EINVAL;
4174                         goto out;
4175                 }
4176                 prog = bpf_object__find_program_by_title(obj, sec_name);
4177                 if (!prog) {
4178                         pr_warn("failed to find program '%s' for CO-RE offset relocation\n",
4179                                 sec_name);
4180                         err = -EINVAL;
4181                         goto out;
4182                 }
4183
4184                 pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
4185                          sec_name, sec->num_info);
4186
4187                 for_each_btf_ext_rec(seg, sec, i, rec) {
4188                         err = bpf_core_reloc_field(prog, rec, i, obj->btf,
4189                                                    targ_btf, cand_cache);
4190                         if (err) {
4191                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
4192                                         sec_name, i, err);
4193                                 goto out;
4194                         }
4195                 }
4196         }
4197
4198 out:
4199         btf__free(targ_btf);
4200         if (!IS_ERR_OR_NULL(cand_cache)) {
4201                 hashmap__for_each_entry(cand_cache, entry, i) {
4202                         bpf_core_free_cands(entry->value);
4203                 }
4204                 hashmap__free(cand_cache);
4205         }
4206         return err;
4207 }
4208
4209 static int
4210 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
4211 {
4212         int err = 0;
4213
4214         if (obj->btf_ext->field_reloc_info.len)
4215                 err = bpf_core_reloc_fields(obj, targ_btf_path);
4216
4217         return err;
4218 }
4219
4220 static int
4221 bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
4222                         struct reloc_desc *relo)
4223 {
4224         struct bpf_insn *insn, *new_insn;
4225         struct bpf_program *text;
4226         size_t new_cnt;
4227         int err;
4228
4229         if (prog->idx == obj->efile.text_shndx) {
4230                 pr_warn("relo in .text insn %d into off %d (insn #%d)\n",
4231                         relo->insn_idx, relo->sym_off, relo->sym_off / 8);
4232                 return -LIBBPF_ERRNO__RELOC;
4233         }
4234
4235         if (prog->main_prog_cnt == 0) {
4236                 text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
4237                 if (!text) {
4238                         pr_warn("no .text section found yet relo into text exist\n");
4239                         return -LIBBPF_ERRNO__RELOC;
4240                 }
4241                 new_cnt = prog->insns_cnt + text->insns_cnt;
4242                 new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
4243                 if (!new_insn) {
4244                         pr_warn("oom in prog realloc\n");
4245                         return -ENOMEM;
4246                 }
4247                 prog->insns = new_insn;
4248
4249                 if (obj->btf_ext) {
4250                         err = bpf_program_reloc_btf_ext(prog, obj,
4251                                                         text->section_name,
4252                                                         prog->insns_cnt);
4253                         if (err)
4254                                 return err;
4255                 }
4256
4257                 memcpy(new_insn + prog->insns_cnt, text->insns,
4258                        text->insns_cnt * sizeof(*insn));
4259                 prog->main_prog_cnt = prog->insns_cnt;
4260                 prog->insns_cnt = new_cnt;
4261                 pr_debug("added %zd insn from %s to prog %s\n",
4262                          text->insns_cnt, text->section_name,
4263                          prog->section_name);
4264         }
4265         insn = &prog->insns[relo->insn_idx];
4266         insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx;
4267         return 0;
4268 }
4269
4270 static int
4271 bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
4272 {
4273         int i, err;
4274
4275         if (!prog)
4276                 return 0;
4277
4278         if (obj->btf_ext) {
4279                 err = bpf_program_reloc_btf_ext(prog, obj,
4280                                                 prog->section_name, 0);
4281                 if (err)
4282                         return err;
4283         }
4284
4285         if (!prog->reloc_desc)
4286                 return 0;
4287
4288         for (i = 0; i < prog->nr_reloc; i++) {
4289                 struct reloc_desc *relo = &prog->reloc_desc[i];
4290                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
4291
4292                 if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
4293                         pr_warn("relocation out of range: '%s'\n",
4294                                 prog->section_name);
4295                         return -LIBBPF_ERRNO__RELOC;
4296                 }
4297
4298                 switch (relo->type) {
4299                 case RELO_LD64:
4300                         insn[0].src_reg = BPF_PSEUDO_MAP_FD;
4301                         insn[0].imm = obj->maps[relo->map_idx].fd;
4302                         break;
4303                 case RELO_DATA:
4304                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
4305                         insn[1].imm = insn[0].imm + relo->sym_off;
4306                         insn[0].imm = obj->maps[relo->map_idx].fd;
4307                         break;
4308                 case RELO_EXTERN:
4309                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
4310                         insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
4311                         insn[1].imm = relo->sym_off;
4312                         break;
4313                 case RELO_CALL:
4314                         err = bpf_program__reloc_text(prog, obj, relo);
4315                         if (err)
4316                                 return err;
4317                         break;
4318                 default:
4319                         pr_warn("relo #%d: bad relo type %d\n", i, relo->type);
4320                         return -EINVAL;
4321                 }
4322         }
4323
4324         zfree(&prog->reloc_desc);
4325         prog->nr_reloc = 0;
4326         return 0;
4327 }
4328
4329 static int
4330 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
4331 {
4332         struct bpf_program *prog;
4333         size_t i;
4334         int err;
4335
4336         if (obj->btf_ext) {
4337                 err = bpf_object__relocate_core(obj, targ_btf_path);
4338                 if (err) {
4339                         pr_warn("failed to perform CO-RE relocations: %d\n",
4340                                 err);
4341                         return err;
4342                 }
4343         }
4344         for (i = 0; i < obj->nr_programs; i++) {
4345                 prog = &obj->programs[i];
4346
4347                 err = bpf_program__relocate(prog, obj);
4348                 if (err) {
4349                         pr_warn("failed to relocate '%s'\n", prog->section_name);
4350                         return err;
4351                 }
4352         }
4353         return 0;
4354 }
4355
4356 static int bpf_object__collect_reloc(struct bpf_object *obj)
4357 {
4358         int i, err;
4359
4360         if (!obj_elf_valid(obj)) {
4361                 pr_warn("Internal error: elf object is closed\n");
4362                 return -LIBBPF_ERRNO__INTERNAL;
4363         }
4364
4365         for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
4366                 GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
4367                 Elf_Data *data = obj->efile.reloc_sects[i].data;
4368                 int idx = shdr->sh_info;
4369                 struct bpf_program *prog;
4370
4371                 if (shdr->sh_type != SHT_REL) {
4372                         pr_warn("internal error at %d\n", __LINE__);
4373                         return -LIBBPF_ERRNO__INTERNAL;
4374                 }
4375
4376                 prog = bpf_object__find_prog_by_idx(obj, idx);
4377                 if (!prog) {
4378                         pr_warn("relocation failed: no section(%d)\n", idx);
4379                         return -LIBBPF_ERRNO__RELOC;
4380                 }
4381
4382                 err = bpf_program__collect_reloc(prog, shdr, data, obj);
4383                 if (err)
4384                         return err;
4385         }
4386         return 0;
4387 }
4388
4389 static int
4390 load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
4391              char *license, __u32 kern_version, int *pfd)
4392 {
4393         struct bpf_load_program_attr load_attr;
4394         char *cp, errmsg[STRERR_BUFSIZE];
4395         int log_buf_size = BPF_LOG_BUF_SIZE;
4396         char *log_buf;
4397         int btf_fd, ret;
4398
4399         if (!insns || !insns_cnt)
4400                 return -EINVAL;
4401
4402         memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
4403         load_attr.prog_type = prog->type;
4404         load_attr.expected_attach_type = prog->expected_attach_type;
4405         if (prog->caps->name)
4406                 load_attr.name = prog->name;
4407         load_attr.insns = insns;
4408         load_attr.insns_cnt = insns_cnt;
4409         load_attr.license = license;
4410         if (prog->type == BPF_PROG_TYPE_TRACING) {
4411                 load_attr.attach_prog_fd = prog->attach_prog_fd;
4412                 load_attr.attach_btf_id = prog->attach_btf_id;
4413         } else {
4414                 load_attr.kern_version = kern_version;
4415                 load_attr.prog_ifindex = prog->prog_ifindex;
4416         }
4417         /* if .BTF.ext was loaded, kernel supports associated BTF for prog */
4418         if (prog->obj->btf_ext)
4419                 btf_fd = bpf_object__btf_fd(prog->obj);
4420         else
4421                 btf_fd = -1;
4422         load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0;
4423         load_attr.func_info = prog->func_info;
4424         load_attr.func_info_rec_size = prog->func_info_rec_size;
4425         load_attr.func_info_cnt = prog->func_info_cnt;
4426         load_attr.line_info = prog->line_info;
4427         load_attr.line_info_rec_size = prog->line_info_rec_size;
4428         load_attr.line_info_cnt = prog->line_info_cnt;
4429         load_attr.log_level = prog->log_level;
4430         load_attr.prog_flags = prog->prog_flags;
4431
4432 retry_load:
4433         log_buf = malloc(log_buf_size);
4434         if (!log_buf)
4435                 pr_warn("Alloc log buffer for bpf loader error, continue without log\n");
4436
4437         ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
4438
4439         if (ret >= 0) {
4440                 if (load_attr.log_level)
4441                         pr_debug("verifier log:\n%s", log_buf);
4442                 *pfd = ret;
4443                 ret = 0;
4444                 goto out;
4445         }
4446
4447         if (errno == ENOSPC) {
4448                 log_buf_size <<= 1;
4449                 free(log_buf);
4450                 goto retry_load;
4451         }
4452         ret = -errno;
4453         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
4454         pr_warn("load bpf program failed: %s\n", cp);
4455         pr_perm_msg(ret);
4456
4457         if (log_buf && log_buf[0] != '\0') {
4458                 ret = -LIBBPF_ERRNO__VERIFY;
4459                 pr_warn("-- BEGIN DUMP LOG ---\n");
4460                 pr_warn("\n%s\n", log_buf);
4461                 pr_warn("-- END LOG --\n");
4462         } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
4463                 pr_warn("Program too large (%zu insns), at most %d insns\n",
4464                         load_attr.insns_cnt, BPF_MAXINSNS);
4465                 ret = -LIBBPF_ERRNO__PROG2BIG;
4466         } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
4467                 /* Wrong program type? */
4468                 int fd;
4469
4470                 load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
4471                 load_attr.expected_attach_type = 0;
4472                 fd = bpf_load_program_xattr(&load_attr, NULL, 0);
4473                 if (fd >= 0) {
4474                         close(fd);
4475                         ret = -LIBBPF_ERRNO__PROGTYPE;
4476                         goto out;
4477                 }
4478         }
4479
4480 out:
4481         free(log_buf);
4482         return ret;
4483 }
4484
4485 static int libbpf_find_attach_btf_id(const char *name,
4486                                      enum bpf_attach_type attach_type,
4487                                      __u32 attach_prog_fd);
4488
4489 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
4490 {
4491         int err = 0, fd, i, btf_id;
4492
4493         if (prog->type == BPF_PROG_TYPE_TRACING) {
4494                 btf_id = libbpf_find_attach_btf_id(prog->section_name,
4495                                                    prog->expected_attach_type,
4496                                                    prog->attach_prog_fd);
4497                 if (btf_id <= 0)
4498                         return btf_id;
4499                 prog->attach_btf_id = btf_id;
4500         }
4501
4502         if (prog->instances.nr < 0 || !prog->instances.fds) {
4503                 if (prog->preprocessor) {
4504                         pr_warn("Internal error: can't load program '%s'\n",
4505                                 prog->section_name);
4506                         return -LIBBPF_ERRNO__INTERNAL;
4507                 }
4508
4509                 prog->instances.fds = malloc(sizeof(int));
4510                 if (!prog->instances.fds) {
4511                         pr_warn("Not enough memory for BPF fds\n");
4512                         return -ENOMEM;
4513                 }
4514                 prog->instances.nr = 1;
4515                 prog->instances.fds[0] = -1;
4516         }
4517
4518         if (!prog->preprocessor) {
4519                 if (prog->instances.nr != 1) {
4520                         pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n",
4521                                 prog->section_name, prog->instances.nr);
4522                 }
4523                 err = load_program(prog, prog->insns, prog->insns_cnt,
4524                                    license, kern_ver, &fd);
4525                 if (!err)
4526                         prog->instances.fds[0] = fd;
4527                 goto out;
4528         }
4529
4530         for (i = 0; i < prog->instances.nr; i++) {
4531                 struct bpf_prog_prep_result result;
4532                 bpf_program_prep_t preprocessor = prog->preprocessor;
4533
4534                 memset(&result, 0, sizeof(result));
4535                 err = preprocessor(prog, i, prog->insns,
4536                                    prog->insns_cnt, &result);
4537                 if (err) {
4538                         pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
4539                                 i, prog->section_name);
4540                         goto out;
4541                 }
4542
4543                 if (!result.new_insn_ptr || !result.new_insn_cnt) {
4544                         pr_debug("Skip loading the %dth instance of program '%s'\n",
4545                                  i, prog->section_name);
4546                         prog->instances.fds[i] = -1;
4547                         if (result.pfd)
4548                                 *result.pfd = -1;
4549                         continue;
4550                 }
4551
4552                 err = load_program(prog, result.new_insn_ptr,
4553                                    result.new_insn_cnt, license, kern_ver, &fd);
4554                 if (err) {
4555                         pr_warn("Loading the %dth instance of program '%s' failed\n",
4556                                 i, prog->section_name);
4557                         goto out;
4558                 }
4559
4560                 if (result.pfd)
4561                         *result.pfd = fd;
4562                 prog->instances.fds[i] = fd;
4563         }
4564 out:
4565         if (err)
4566                 pr_warn("failed to load program '%s'\n", prog->section_name);
4567         zfree(&prog->insns);
4568         prog->insns_cnt = 0;
4569         return err;
4570 }
4571
4572 static bool bpf_program__is_function_storage(const struct bpf_program *prog,
4573                                              const struct bpf_object *obj)
4574 {
4575         return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
4576 }
4577
4578 static int
4579 bpf_object__load_progs(struct bpf_object *obj, int log_level)
4580 {
4581         size_t i;
4582         int err;
4583
4584         for (i = 0; i < obj->nr_programs; i++) {
4585                 if (bpf_program__is_function_storage(&obj->programs[i], obj))
4586                         continue;
4587                 obj->programs[i].log_level |= log_level;
4588                 err = bpf_program__load(&obj->programs[i],
4589                                         obj->license,
4590                                         obj->kern_version);
4591                 if (err)
4592                         return err;
4593         }
4594         return 0;
4595 }
4596
4597 static struct bpf_object *
4598 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
4599                    const struct bpf_object_open_opts *opts)
4600 {
4601         const char *obj_name, *kconfig;
4602         struct bpf_program *prog;
4603         struct bpf_object *obj;
4604         char tmp_name[64];
4605         int err;
4606
4607         if (elf_version(EV_CURRENT) == EV_NONE) {
4608                 pr_warn("failed to init libelf for %s\n",
4609                         path ? : "(mem buf)");
4610                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
4611         }
4612
4613         if (!OPTS_VALID(opts, bpf_object_open_opts))
4614                 return ERR_PTR(-EINVAL);
4615
4616         obj_name = OPTS_GET(opts, object_name, NULL);
4617         if (obj_buf) {
4618                 if (!obj_name) {
4619                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
4620                                  (unsigned long)obj_buf,
4621                                  (unsigned long)obj_buf_sz);
4622                         obj_name = tmp_name;
4623                 }
4624                 path = obj_name;
4625                 pr_debug("loading object '%s' from buffer\n", obj_name);
4626         }
4627
4628         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
4629         if (IS_ERR(obj))
4630                 return obj;
4631
4632         obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false);
4633         kconfig = OPTS_GET(opts, kconfig, NULL);
4634         if (kconfig) {
4635                 obj->kconfig = strdup(kconfig);
4636                 if (!obj->kconfig)
4637                         return ERR_PTR(-ENOMEM);
4638         }
4639
4640         err = bpf_object__elf_init(obj);
4641         err = err ? : bpf_object__check_endianness(obj);
4642         err = err ? : bpf_object__elf_collect(obj);
4643         err = err ? : bpf_object__collect_externs(obj);
4644         err = err ? : bpf_object__finalize_btf(obj);
4645         err = err ? : bpf_object__init_maps(obj, opts);
4646         err = err ? : bpf_object__init_prog_names(obj);
4647         err = err ? : bpf_object__collect_reloc(obj);
4648         if (err)
4649                 goto out;
4650         bpf_object__elf_finish(obj);
4651
4652         bpf_object__for_each_program(prog, obj) {
4653                 enum bpf_prog_type prog_type;
4654                 enum bpf_attach_type attach_type;
4655
4656                 err = libbpf_prog_type_by_name(prog->section_name, &prog_type,
4657                                                &attach_type);
4658                 if (err == -ESRCH)
4659                         /* couldn't guess, but user might manually specify */
4660                         continue;
4661                 if (err)
4662                         goto out;
4663
4664                 bpf_program__set_type(prog, prog_type);
4665                 bpf_program__set_expected_attach_type(prog, attach_type);
4666                 if (prog_type == BPF_PROG_TYPE_TRACING)
4667                         prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
4668         }
4669
4670         return obj;
4671 out:
4672         bpf_object__close(obj);
4673         return ERR_PTR(err);
4674 }
4675
4676 static struct bpf_object *
4677 __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
4678 {
4679         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
4680                 .relaxed_maps = flags & MAPS_RELAX_COMPAT,
4681         );
4682
4683         /* param validation */
4684         if (!attr->file)
4685                 return NULL;
4686
4687         pr_debug("loading %s\n", attr->file);
4688         return __bpf_object__open(attr->file, NULL, 0, &opts);
4689 }
4690
4691 struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
4692 {
4693         return __bpf_object__open_xattr(attr, 0);
4694 }
4695
4696 struct bpf_object *bpf_object__open(const char *path)
4697 {
4698         struct bpf_object_open_attr attr = {
4699                 .file           = path,
4700                 .prog_type      = BPF_PROG_TYPE_UNSPEC,
4701         };
4702
4703         return bpf_object__open_xattr(&attr);
4704 }
4705
4706 struct bpf_object *
4707 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
4708 {
4709         if (!path)
4710                 return ERR_PTR(-EINVAL);
4711
4712         pr_debug("loading %s\n", path);
4713
4714         return __bpf_object__open(path, NULL, 0, opts);
4715 }
4716
4717 struct bpf_object *
4718 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
4719                      const struct bpf_object_open_opts *opts)
4720 {
4721         if (!obj_buf || obj_buf_sz == 0)
4722                 return ERR_PTR(-EINVAL);
4723
4724         return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
4725 }
4726
4727 struct bpf_object *
4728 bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
4729                         const char *name)
4730 {
4731         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
4732                 .object_name = name,
4733                 /* wrong default, but backwards-compatible */
4734                 .relaxed_maps = true,
4735         );
4736
4737         /* returning NULL is wrong, but backwards-compatible */
4738         if (!obj_buf || obj_buf_sz == 0)
4739                 return NULL;
4740
4741         return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
4742 }
4743
4744 int bpf_object__unload(struct bpf_object *obj)
4745 {
4746         size_t i;
4747
4748         if (!obj)
4749                 return -EINVAL;
4750
4751         for (i = 0; i < obj->nr_maps; i++)
4752                 zclose(obj->maps[i].fd);
4753
4754         for (i = 0; i < obj->nr_programs; i++)
4755                 bpf_program__unload(&obj->programs[i]);
4756
4757         return 0;
4758 }
4759
4760 static int bpf_object__sanitize_maps(struct bpf_object *obj)
4761 {
4762         struct bpf_map *m;
4763
4764         bpf_object__for_each_map(m, obj) {
4765                 if (!bpf_map__is_internal(m))
4766                         continue;
4767                 if (!obj->caps.global_data) {
4768                         pr_warn("kernel doesn't support global data\n");
4769                         return -ENOTSUP;
4770                 }
4771                 if (!obj->caps.array_mmap)
4772                         m->def.map_flags ^= BPF_F_MMAPABLE;
4773         }
4774
4775         return 0;
4776 }
4777
4778 static int bpf_object__resolve_externs(struct bpf_object *obj,
4779                                        const char *extra_kconfig)
4780 {
4781         bool need_config = false;
4782         struct extern_desc *ext;
4783         int err, i;
4784         void *data;
4785
4786         if (obj->nr_extern == 0)
4787                 return 0;
4788
4789         data = obj->maps[obj->kconfig_map_idx].mmaped;
4790
4791         for (i = 0; i < obj->nr_extern; i++) {
4792                 ext = &obj->externs[i];
4793
4794                 if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
4795                         void *ext_val = data + ext->data_off;
4796                         __u32 kver = get_kernel_version();
4797
4798                         if (!kver) {
4799                                 pr_warn("failed to get kernel version\n");
4800                                 return -EINVAL;
4801                         }
4802                         err = set_ext_value_num(ext, ext_val, kver);
4803                         if (err)
4804                                 return err;
4805                         pr_debug("extern %s=0x%x\n", ext->name, kver);
4806                 } else if (strncmp(ext->name, "CONFIG_", 7) == 0) {
4807                         need_config = true;
4808                 } else {
4809                         pr_warn("unrecognized extern '%s'\n", ext->name);
4810                         return -EINVAL;
4811                 }
4812         }
4813         if (need_config && extra_kconfig) {
4814                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data);
4815                 if (err)
4816                         return -EINVAL;
4817                 need_config = false;
4818                 for (i = 0; i < obj->nr_extern; i++) {
4819                         ext = &obj->externs[i];
4820                         if (!ext->is_set) {
4821                                 need_config = true;
4822                                 break;
4823                         }
4824                 }
4825         }
4826         if (need_config) {
4827                 err = bpf_object__read_kconfig_file(obj, data);
4828                 if (err)
4829                         return -EINVAL;
4830         }
4831         for (i = 0; i < obj->nr_extern; i++) {
4832                 ext = &obj->externs[i];
4833
4834                 if (!ext->is_set && !ext->is_weak) {
4835                         pr_warn("extern %s (strong) not resolved\n", ext->name);
4836                         return -ESRCH;
4837                 } else if (!ext->is_set) {
4838                         pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
4839                                  ext->name);
4840                 }
4841         }
4842
4843         return 0;
4844 }
4845
4846 int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
4847 {
4848         struct bpf_object *obj;
4849         int err, i;
4850
4851         if (!attr)
4852                 return -EINVAL;
4853         obj = attr->obj;
4854         if (!obj)
4855                 return -EINVAL;
4856
4857         if (obj->loaded) {
4858                 pr_warn("object should not be loaded twice\n");
4859                 return -EINVAL;
4860         }
4861
4862         obj->loaded = true;
4863
4864         err = bpf_object__probe_caps(obj);
4865         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
4866         err = err ? : bpf_object__sanitize_and_load_btf(obj);
4867         err = err ? : bpf_object__sanitize_maps(obj);
4868         err = err ? : bpf_object__create_maps(obj);
4869         err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
4870         err = err ? : bpf_object__load_progs(obj, attr->log_level);
4871         if (err)
4872                 goto out;
4873
4874         return 0;
4875 out:
4876         /* unpin any maps that were auto-pinned during load */
4877         for (i = 0; i < obj->nr_maps; i++)
4878                 if (obj->maps[i].pinned && !obj->maps[i].reused)
4879                         bpf_map__unpin(&obj->maps[i], NULL);
4880
4881         bpf_object__unload(obj);
4882         pr_warn("failed to load object '%s'\n", obj->path);
4883         return err;
4884 }
4885
4886 int bpf_object__load(struct bpf_object *obj)
4887 {
4888         struct bpf_object_load_attr attr = {
4889                 .obj = obj,
4890         };
4891
4892         return bpf_object__load_xattr(&attr);
4893 }
4894
4895 static int make_parent_dir(const char *path)
4896 {
4897         char *cp, errmsg[STRERR_BUFSIZE];
4898         char *dname, *dir;
4899         int err = 0;
4900
4901         dname = strdup(path);
4902         if (dname == NULL)
4903                 return -ENOMEM;
4904
4905         dir = dirname(dname);
4906         if (mkdir(dir, 0700) && errno != EEXIST)
4907                 err = -errno;
4908
4909         free(dname);
4910         if (err) {
4911                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4912                 pr_warn("failed to mkdir %s: %s\n", path, cp);
4913         }
4914         return err;
4915 }
4916
4917 static int check_path(const char *path)
4918 {
4919         char *cp, errmsg[STRERR_BUFSIZE];
4920         struct statfs st_fs;
4921         char *dname, *dir;
4922         int err = 0;
4923
4924         if (path == NULL)
4925                 return -EINVAL;
4926
4927         dname = strdup(path);
4928         if (dname == NULL)
4929                 return -ENOMEM;
4930
4931         dir = dirname(dname);
4932         if (statfs(dir, &st_fs)) {
4933                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
4934                 pr_warn("failed to statfs %s: %s\n", dir, cp);
4935                 err = -errno;
4936         }
4937         free(dname);
4938
4939         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
4940                 pr_warn("specified path %s is not on BPF FS\n", path);
4941                 err = -EINVAL;
4942         }
4943
4944         return err;
4945 }
4946
4947 int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
4948                               int instance)
4949 {
4950         char *cp, errmsg[STRERR_BUFSIZE];
4951         int err;
4952
4953         err = make_parent_dir(path);
4954         if (err)
4955                 return err;
4956
4957         err = check_path(path);
4958         if (err)
4959                 return err;
4960
4961         if (prog == NULL) {
4962                 pr_warn("invalid program pointer\n");
4963                 return -EINVAL;
4964         }
4965
4966         if (instance < 0 || instance >= prog->instances.nr) {
4967                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
4968                         instance, prog->section_name, prog->instances.nr);
4969                 return -EINVAL;
4970         }
4971
4972         if (bpf_obj_pin(prog->instances.fds[instance], path)) {
4973                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
4974                 pr_warn("failed to pin program: %s\n", cp);
4975                 return -errno;
4976         }
4977         pr_debug("pinned program '%s'\n", path);
4978
4979         return 0;
4980 }
4981
4982 int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
4983                                 int instance)
4984 {
4985         int err;
4986
4987         err = check_path(path);
4988         if (err)
4989                 return err;
4990
4991         if (prog == NULL) {
4992                 pr_warn("invalid program pointer\n");
4993                 return -EINVAL;
4994         }
4995
4996         if (instance < 0 || instance >= prog->instances.nr) {
4997                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
4998                         instance, prog->section_name, prog->instances.nr);
4999                 return -EINVAL;
5000         }
5001
5002         err = unlink(path);
5003         if (err != 0)
5004                 return -errno;
5005         pr_debug("unpinned program '%s'\n", path);
5006
5007         return 0;
5008 }
5009
5010 int bpf_program__pin(struct bpf_program *prog, const char *path)
5011 {
5012         int i, err;
5013
5014         err = make_parent_dir(path);
5015         if (err)
5016                 return err;
5017
5018         err = check_path(path);
5019         if (err)
5020                 return err;
5021
5022         if (prog == NULL) {
5023                 pr_warn("invalid program pointer\n");
5024                 return -EINVAL;
5025         }
5026
5027         if (prog->instances.nr <= 0) {
5028                 pr_warn("no instances of prog %s to pin\n",
5029                            prog->section_name);
5030                 return -EINVAL;
5031         }
5032
5033         if (prog->instances.nr == 1) {
5034                 /* don't create subdirs when pinning single instance */
5035                 return bpf_program__pin_instance(prog, path, 0);
5036         }
5037
5038         for (i = 0; i < prog->instances.nr; i++) {
5039                 char buf[PATH_MAX];
5040                 int len;
5041
5042                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
5043                 if (len < 0) {
5044                         err = -EINVAL;
5045                         goto err_unpin;
5046                 } else if (len >= PATH_MAX) {
5047                         err = -ENAMETOOLONG;
5048                         goto err_unpin;
5049                 }
5050
5051                 err = bpf_program__pin_instance(prog, buf, i);
5052                 if (err)
5053                         goto err_unpin;
5054         }
5055
5056         return 0;
5057
5058 err_unpin:
5059         for (i = i - 1; i >= 0; i--) {
5060                 char buf[PATH_MAX];
5061                 int len;
5062
5063                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
5064                 if (len < 0)
5065                         continue;
5066                 else if (len >= PATH_MAX)
5067                         continue;
5068
5069                 bpf_program__unpin_instance(prog, buf, i);
5070         }
5071
5072         rmdir(path);
5073
5074         return err;
5075 }
5076
5077 int bpf_program__unpin(struct bpf_program *prog, const char *path)
5078 {
5079         int i, err;
5080
5081         err = check_path(path);
5082         if (err)
5083                 return err;
5084
5085         if (prog == NULL) {
5086                 pr_warn("invalid program pointer\n");
5087                 return -EINVAL;
5088         }
5089
5090         if (prog->instances.nr <= 0) {
5091                 pr_warn("no instances of prog %s to pin\n",
5092                            prog->section_name);
5093                 return -EINVAL;
5094         }
5095
5096         if (prog->instances.nr == 1) {
5097                 /* don't create subdirs when pinning single instance */
5098                 return bpf_program__unpin_instance(prog, path, 0);
5099         }
5100
5101         for (i = 0; i < prog->instances.nr; i++) {
5102                 char buf[PATH_MAX];
5103                 int len;
5104
5105                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
5106                 if (len < 0)
5107                         return -EINVAL;
5108                 else if (len >= PATH_MAX)
5109                         return -ENAMETOOLONG;
5110
5111                 err = bpf_program__unpin_instance(prog, buf, i);
5112                 if (err)
5113                         return err;
5114         }
5115
5116         err = rmdir(path);
5117         if (err)
5118                 return -errno;
5119
5120         return 0;
5121 }
5122
5123 int bpf_map__pin(struct bpf_map *map, const char *path)
5124 {
5125         char *cp, errmsg[STRERR_BUFSIZE];
5126         int err;
5127
5128         if (map == NULL) {
5129                 pr_warn("invalid map pointer\n");
5130                 return -EINVAL;
5131         }
5132
5133         if (map->pin_path) {
5134                 if (path && strcmp(path, map->pin_path)) {
5135                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
5136                                 bpf_map__name(map), map->pin_path, path);
5137                         return -EINVAL;
5138                 } else if (map->pinned) {
5139                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
5140                                  bpf_map__name(map), map->pin_path);
5141                         return 0;
5142                 }
5143         } else {
5144                 if (!path) {
5145                         pr_warn("missing a path to pin map '%s' at\n",
5146                                 bpf_map__name(map));
5147                         return -EINVAL;
5148                 } else if (map->pinned) {
5149                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
5150                         return -EEXIST;
5151                 }
5152
5153                 map->pin_path = strdup(path);
5154                 if (!map->pin_path) {
5155                         err = -errno;
5156                         goto out_err;
5157                 }
5158         }
5159
5160         err = make_parent_dir(map->pin_path);
5161         if (err)
5162                 return err;
5163
5164         err = check_path(map->pin_path);
5165         if (err)
5166                 return err;
5167
5168         if (bpf_obj_pin(map->fd, map->pin_path)) {
5169                 err = -errno;
5170                 goto out_err;
5171         }
5172
5173         map->pinned = true;
5174         pr_debug("pinned map '%s'\n", map->pin_path);
5175
5176         return 0;
5177
5178 out_err:
5179         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
5180         pr_warn("failed to pin map: %s\n", cp);
5181         return err;
5182 }
5183
5184 int bpf_map__unpin(struct bpf_map *map, const char *path)
5185 {
5186         int err;
5187
5188         if (map == NULL) {
5189                 pr_warn("invalid map pointer\n");
5190                 return -EINVAL;
5191         }
5192
5193         if (map->pin_path) {
5194                 if (path && strcmp(path, map->pin_path)) {
5195                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
5196                                 bpf_map__name(map), map->pin_path, path);
5197                         return -EINVAL;
5198                 }
5199                 path = map->pin_path;
5200         } else if (!path) {
5201                 pr_warn("no path to unpin map '%s' from\n",
5202                         bpf_map__name(map));
5203                 return -EINVAL;
5204         }
5205
5206         err = check_path(path);
5207         if (err)
5208                 return err;
5209
5210         err = unlink(path);
5211         if (err != 0)
5212                 return -errno;
5213
5214         map->pinned = false;
5215         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
5216
5217         return 0;
5218 }
5219
5220 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
5221 {
5222         char *new = NULL;
5223
5224         if (path) {
5225                 new = strdup(path);
5226                 if (!new)
5227                         return -errno;
5228         }
5229
5230         free(map->pin_path);
5231         map->pin_path = new;
5232         return 0;
5233 }
5234
5235 const char *bpf_map__get_pin_path(const struct bpf_map *map)
5236 {
5237         return map->pin_path;
5238 }
5239
5240 bool bpf_map__is_pinned(const struct bpf_map *map)
5241 {
5242         return map->pinned;
5243 }
5244
5245 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
5246 {
5247         struct bpf_map *map;
5248         int err;
5249
5250         if (!obj)
5251                 return -ENOENT;
5252
5253         if (!obj->loaded) {
5254                 pr_warn("object not yet loaded; load it first\n");
5255                 return -ENOENT;
5256         }
5257
5258         bpf_object__for_each_map(map, obj) {
5259                 char *pin_path = NULL;
5260                 char buf[PATH_MAX];
5261
5262                 if (path) {
5263                         int len;
5264
5265                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
5266                                        bpf_map__name(map));
5267                         if (len < 0) {
5268                                 err = -EINVAL;
5269                                 goto err_unpin_maps;
5270                         } else if (len >= PATH_MAX) {
5271                                 err = -ENAMETOOLONG;
5272                                 goto err_unpin_maps;
5273                         }
5274                         pin_path = buf;
5275                 } else if (!map->pin_path) {
5276                         continue;
5277                 }
5278
5279                 err = bpf_map__pin(map, pin_path);
5280                 if (err)
5281                         goto err_unpin_maps;
5282         }
5283
5284         return 0;
5285
5286 err_unpin_maps:
5287         while ((map = bpf_map__prev(map, obj))) {
5288                 if (!map->pin_path)
5289                         continue;
5290
5291                 bpf_map__unpin(map, NULL);
5292         }
5293
5294         return err;
5295 }
5296
5297 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
5298 {
5299         struct bpf_map *map;
5300         int err;
5301
5302         if (!obj)
5303                 return -ENOENT;
5304
5305         bpf_object__for_each_map(map, obj) {
5306                 char *pin_path = NULL;
5307                 char buf[PATH_MAX];
5308
5309                 if (path) {
5310                         int len;
5311
5312                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
5313                                        bpf_map__name(map));
5314                         if (len < 0)
5315                                 return -EINVAL;
5316                         else if (len >= PATH_MAX)
5317                                 return -ENAMETOOLONG;
5318                         pin_path = buf;
5319                 } else if (!map->pin_path) {
5320                         continue;
5321                 }
5322
5323                 err = bpf_map__unpin(map, pin_path);
5324                 if (err)
5325                         return err;
5326         }
5327
5328         return 0;
5329 }
5330
5331 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
5332 {
5333         struct bpf_program *prog;
5334         int err;
5335
5336         if (!obj)
5337                 return -ENOENT;
5338
5339         if (!obj->loaded) {
5340                 pr_warn("object not yet loaded; load it first\n");
5341                 return -ENOENT;
5342         }
5343
5344         bpf_object__for_each_program(prog, obj) {
5345                 char buf[PATH_MAX];
5346                 int len;
5347
5348                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
5349                                prog->pin_name);
5350                 if (len < 0) {
5351                         err = -EINVAL;
5352                         goto err_unpin_programs;
5353                 } else if (len >= PATH_MAX) {
5354                         err = -ENAMETOOLONG;
5355                         goto err_unpin_programs;
5356                 }
5357
5358                 err = bpf_program__pin(prog, buf);
5359                 if (err)
5360                         goto err_unpin_programs;
5361         }
5362
5363         return 0;
5364
5365 err_unpin_programs:
5366         while ((prog = bpf_program__prev(prog, obj))) {
5367                 char buf[PATH_MAX];
5368                 int len;
5369
5370                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
5371                                prog->pin_name);
5372                 if (len < 0)
5373                         continue;
5374                 else if (len >= PATH_MAX)
5375                         continue;
5376
5377                 bpf_program__unpin(prog, buf);
5378         }
5379
5380         return err;
5381 }
5382
5383 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
5384 {
5385         struct bpf_program *prog;
5386         int err;
5387
5388         if (!obj)
5389                 return -ENOENT;
5390
5391         bpf_object__for_each_program(prog, obj) {
5392                 char buf[PATH_MAX];
5393                 int len;
5394
5395                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
5396                                prog->pin_name);
5397                 if (len < 0)
5398                         return -EINVAL;
5399                 else if (len >= PATH_MAX)
5400                         return -ENAMETOOLONG;
5401
5402                 err = bpf_program__unpin(prog, buf);
5403                 if (err)
5404                         return err;
5405         }
5406
5407         return 0;
5408 }
5409
5410 int bpf_object__pin(struct bpf_object *obj, const char *path)
5411 {
5412         int err;
5413
5414         err = bpf_object__pin_maps(obj, path);
5415         if (err)
5416                 return err;
5417
5418         err = bpf_object__pin_programs(obj, path);
5419         if (err) {
5420                 bpf_object__unpin_maps(obj, path);
5421                 return err;
5422         }
5423
5424         return 0;
5425 }
5426
5427 void bpf_object__close(struct bpf_object *obj)
5428 {
5429         size_t i;
5430
5431         if (!obj)
5432                 return;
5433
5434         if (obj->clear_priv)
5435                 obj->clear_priv(obj, obj->priv);
5436
5437         bpf_object__elf_finish(obj);
5438         bpf_object__unload(obj);
5439         btf__free(obj->btf);
5440         btf_ext__free(obj->btf_ext);
5441
5442         for (i = 0; i < obj->nr_maps; i++) {
5443                 struct bpf_map *map = &obj->maps[i];
5444
5445                 if (map->clear_priv)
5446                         map->clear_priv(map, map->priv);
5447                 map->priv = NULL;
5448                 map->clear_priv = NULL;
5449
5450                 if (map->mmaped) {
5451                         munmap(map->mmaped, bpf_map_mmap_sz(map));
5452                         map->mmaped = NULL;
5453                 }
5454
5455                 zfree(&map->name);
5456                 zfree(&map->pin_path);
5457         }
5458
5459         zfree(&obj->kconfig);
5460         zfree(&obj->externs);
5461         obj->nr_extern = 0;
5462
5463         zfree(&obj->maps);
5464         obj->nr_maps = 0;
5465
5466         if (obj->programs && obj->nr_programs) {
5467                 for (i = 0; i < obj->nr_programs; i++)
5468                         bpf_program__exit(&obj->programs[i]);
5469         }
5470         zfree(&obj->programs);
5471
5472         list_del(&obj->list);
5473         free(obj);
5474 }
5475
5476 struct bpf_object *
5477 bpf_object__next(struct bpf_object *prev)
5478 {
5479         struct bpf_object *next;
5480
5481         if (!prev)
5482                 next = list_first_entry(&bpf_objects_list,
5483                                         struct bpf_object,
5484                                         list);
5485         else
5486                 next = list_next_entry(prev, list);
5487
5488         /* Empty list is noticed here so don't need checking on entry. */
5489         if (&next->list == &bpf_objects_list)
5490                 return NULL;
5491
5492         return next;
5493 }
5494
5495 const char *bpf_object__name(const struct bpf_object *obj)
5496 {
5497         return obj ? obj->name : ERR_PTR(-EINVAL);
5498 }
5499
5500 unsigned int bpf_object__kversion(const struct bpf_object *obj)
5501 {
5502         return obj ? obj->kern_version : 0;
5503 }
5504
5505 struct btf *bpf_object__btf(const struct bpf_object *obj)
5506 {
5507         return obj ? obj->btf : NULL;
5508 }
5509
5510 int bpf_object__btf_fd(const struct bpf_object *obj)
5511 {
5512         return obj->btf ? btf__fd(obj->btf) : -1;
5513 }
5514
5515 int bpf_object__set_priv(struct bpf_object *obj, void *priv,
5516                          bpf_object_clear_priv_t clear_priv)
5517 {
5518         if (obj->priv && obj->clear_priv)
5519                 obj->clear_priv(obj, obj->priv);
5520
5521         obj->priv = priv;
5522         obj->clear_priv = clear_priv;
5523         return 0;
5524 }
5525
5526 void *bpf_object__priv(const struct bpf_object *obj)
5527 {
5528         return obj ? obj->priv : ERR_PTR(-EINVAL);
5529 }
5530
5531 static struct bpf_program *
5532 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
5533                     bool forward)
5534 {
5535         size_t nr_programs = obj->nr_programs;
5536         ssize_t idx;
5537
5538         if (!nr_programs)
5539                 return NULL;
5540
5541         if (!p)
5542                 /* Iter from the beginning */
5543                 return forward ? &obj->programs[0] :
5544                         &obj->programs[nr_programs - 1];
5545
5546         if (p->obj != obj) {
5547                 pr_warn("error: program handler doesn't match object\n");
5548                 return NULL;
5549         }
5550
5551         idx = (p - obj->programs) + (forward ? 1 : -1);
5552         if (idx >= obj->nr_programs || idx < 0)
5553                 return NULL;
5554         return &obj->programs[idx];
5555 }
5556
5557 struct bpf_program *
5558 bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
5559 {
5560         struct bpf_program *prog = prev;
5561
5562         do {
5563                 prog = __bpf_program__iter(prog, obj, true);
5564         } while (prog && bpf_program__is_function_storage(prog, obj));
5565
5566         return prog;
5567 }
5568
5569 struct bpf_program *
5570 bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
5571 {
5572         struct bpf_program *prog = next;
5573
5574         do {
5575                 prog = __bpf_program__iter(prog, obj, false);
5576         } while (prog && bpf_program__is_function_storage(prog, obj));
5577
5578         return prog;
5579 }
5580
5581 int bpf_program__set_priv(struct bpf_program *prog, void *priv,
5582                           bpf_program_clear_priv_t clear_priv)
5583 {
5584         if (prog->priv && prog->clear_priv)
5585                 prog->clear_priv(prog, prog->priv);
5586
5587         prog->priv = priv;
5588         prog->clear_priv = clear_priv;
5589         return 0;
5590 }
5591
5592 void *bpf_program__priv(const struct bpf_program *prog)
5593 {
5594         return prog ? prog->priv : ERR_PTR(-EINVAL);
5595 }
5596
5597 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
5598 {
5599         prog->prog_ifindex = ifindex;
5600 }
5601
5602 const char *bpf_program__name(const struct bpf_program *prog)
5603 {
5604         return prog->name;
5605 }
5606
5607 const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
5608 {
5609         const char *title;
5610
5611         title = prog->section_name;
5612         if (needs_copy) {
5613                 title = strdup(title);
5614                 if (!title) {
5615                         pr_warn("failed to strdup program title\n");
5616                         return ERR_PTR(-ENOMEM);
5617                 }
5618         }
5619
5620         return title;
5621 }
5622
5623 int bpf_program__fd(const struct bpf_program *prog)
5624 {
5625         return bpf_program__nth_fd(prog, 0);
5626 }
5627
5628 size_t bpf_program__size(const struct bpf_program *prog)
5629 {
5630         return prog->insns_cnt * sizeof(struct bpf_insn);
5631 }
5632
5633 int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
5634                           bpf_program_prep_t prep)
5635 {
5636         int *instances_fds;
5637
5638         if (nr_instances <= 0 || !prep)
5639                 return -EINVAL;
5640
5641         if (prog->instances.nr > 0 || prog->instances.fds) {
5642                 pr_warn("Can't set pre-processor after loading\n");
5643                 return -EINVAL;
5644         }
5645
5646         instances_fds = malloc(sizeof(int) * nr_instances);
5647         if (!instances_fds) {
5648                 pr_warn("alloc memory failed for fds\n");
5649                 return -ENOMEM;
5650         }
5651
5652         /* fill all fd with -1 */
5653         memset(instances_fds, -1, sizeof(int) * nr_instances);
5654
5655         prog->instances.nr = nr_instances;
5656         prog->instances.fds = instances_fds;
5657         prog->preprocessor = prep;
5658         return 0;
5659 }
5660
5661 int bpf_program__nth_fd(const struct bpf_program *prog, int n)
5662 {
5663         int fd;
5664
5665         if (!prog)
5666                 return -EINVAL;
5667
5668         if (n >= prog->instances.nr || n < 0) {
5669                 pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
5670                         n, prog->section_name, prog->instances.nr);
5671                 return -EINVAL;
5672         }
5673
5674         fd = prog->instances.fds[n];
5675         if (fd < 0) {
5676                 pr_warn("%dth instance of program '%s' is invalid\n",
5677                         n, prog->section_name);
5678                 return -ENOENT;
5679         }
5680
5681         return fd;
5682 }
5683
5684 enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
5685 {
5686         return prog->type;
5687 }
5688
5689 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
5690 {
5691         prog->type = type;
5692 }
5693
5694 static bool bpf_program__is_type(const struct bpf_program *prog,
5695                                  enum bpf_prog_type type)
5696 {
5697         return prog ? (prog->type == type) : false;
5698 }
5699
5700 #define BPF_PROG_TYPE_FNS(NAME, TYPE)                           \
5701 int bpf_program__set_##NAME(struct bpf_program *prog)           \
5702 {                                                               \
5703         if (!prog)                                              \
5704                 return -EINVAL;                                 \
5705         bpf_program__set_type(prog, TYPE);                      \
5706         return 0;                                               \
5707 }                                                               \
5708                                                                 \
5709 bool bpf_program__is_##NAME(const struct bpf_program *prog)     \
5710 {                                                               \
5711         return bpf_program__is_type(prog, TYPE);                \
5712 }                                                               \
5713
5714 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
5715 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
5716 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
5717 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
5718 BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
5719 BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
5720 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
5721 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
5722 BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
5723
5724 enum bpf_attach_type
5725 bpf_program__get_expected_attach_type(struct bpf_program *prog)
5726 {
5727         return prog->expected_attach_type;
5728 }
5729
5730 void bpf_program__set_expected_attach_type(struct bpf_program *prog,
5731                                            enum bpf_attach_type type)
5732 {
5733         prog->expected_attach_type = type;
5734 }
5735
5736 #define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \
5737         { string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype }
5738
5739 /* Programs that can NOT be attached. */
5740 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
5741
5742 /* Programs that can be attached. */
5743 #define BPF_APROG_SEC(string, ptype, atype) \
5744         BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype)
5745
5746 /* Programs that must specify expected attach type at load time. */
5747 #define BPF_EAPROG_SEC(string, ptype, eatype) \
5748         BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype)
5749
5750 /* Programs that use BTF to identify attach point */
5751 #define BPF_PROG_BTF(string, ptype, eatype) \
5752         BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0)
5753
5754 /* Programs that can be attached but attach type can't be identified by section
5755  * name. Kept for backward compatibility.
5756  */
5757 #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
5758
5759 #define SEC_DEF(sec_pfx, ptype, ...) {                                      \
5760         .sec = sec_pfx,                                                     \
5761         .len = sizeof(sec_pfx) - 1,                                         \
5762         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
5763         __VA_ARGS__                                                         \
5764 }
5765
5766 struct bpf_sec_def;
5767
5768 typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
5769                                         struct bpf_program *prog);
5770
5771 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
5772                                       struct bpf_program *prog);
5773 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
5774                                   struct bpf_program *prog);
5775 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
5776                                       struct bpf_program *prog);
5777 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
5778                                      struct bpf_program *prog);
5779
5780 struct bpf_sec_def {
5781         const char *sec;
5782         size_t len;
5783         enum bpf_prog_type prog_type;
5784         enum bpf_attach_type expected_attach_type;
5785         bool is_attachable;
5786         bool is_attach_btf;
5787         enum bpf_attach_type attach_type;
5788         attach_fn_t attach_fn;
5789 };
5790
5791 static const struct bpf_sec_def section_defs[] = {
5792         BPF_PROG_SEC("socket",                  BPF_PROG_TYPE_SOCKET_FILTER),
5793         BPF_PROG_SEC("sk_reuseport",            BPF_PROG_TYPE_SK_REUSEPORT),
5794         SEC_DEF("kprobe/", KPROBE,
5795                 .attach_fn = attach_kprobe),
5796         BPF_PROG_SEC("uprobe/",                 BPF_PROG_TYPE_KPROBE),
5797         SEC_DEF("kretprobe/", KPROBE,
5798                 .attach_fn = attach_kprobe),
5799         BPF_PROG_SEC("uretprobe/",              BPF_PROG_TYPE_KPROBE),
5800         BPF_PROG_SEC("classifier",              BPF_PROG_TYPE_SCHED_CLS),
5801         BPF_PROG_SEC("action",                  BPF_PROG_TYPE_SCHED_ACT),
5802         SEC_DEF("tracepoint/", TRACEPOINT,
5803                 .attach_fn = attach_tp),
5804         SEC_DEF("tp/", TRACEPOINT,
5805                 .attach_fn = attach_tp),
5806         SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
5807                 .attach_fn = attach_raw_tp),
5808         SEC_DEF("raw_tp/", RAW_TRACEPOINT,
5809                 .attach_fn = attach_raw_tp),
5810         SEC_DEF("tp_btf/", TRACING,
5811                 .expected_attach_type = BPF_TRACE_RAW_TP,
5812                 .is_attach_btf = true,
5813                 .attach_fn = attach_trace),
5814         SEC_DEF("fentry/", TRACING,
5815                 .expected_attach_type = BPF_TRACE_FENTRY,
5816                 .is_attach_btf = true,
5817                 .attach_fn = attach_trace),
5818         SEC_DEF("fexit/", TRACING,
5819                 .expected_attach_type = BPF_TRACE_FEXIT,
5820                 .is_attach_btf = true,
5821                 .attach_fn = attach_trace),
5822         BPF_PROG_SEC("xdp",                     BPF_PROG_TYPE_XDP),
5823         BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
5824         BPF_PROG_SEC("lwt_in",                  BPF_PROG_TYPE_LWT_IN),
5825         BPF_PROG_SEC("lwt_out",                 BPF_PROG_TYPE_LWT_OUT),
5826         BPF_PROG_SEC("lwt_xmit",                BPF_PROG_TYPE_LWT_XMIT),
5827         BPF_PROG_SEC("lwt_seg6local",           BPF_PROG_TYPE_LWT_SEG6LOCAL),
5828         BPF_APROG_SEC("cgroup_skb/ingress",     BPF_PROG_TYPE_CGROUP_SKB,
5829                                                 BPF_CGROUP_INET_INGRESS),
5830         BPF_APROG_SEC("cgroup_skb/egress",      BPF_PROG_TYPE_CGROUP_SKB,
5831                                                 BPF_CGROUP_INET_EGRESS),
5832         BPF_APROG_COMPAT("cgroup/skb",          BPF_PROG_TYPE_CGROUP_SKB),
5833         BPF_APROG_SEC("cgroup/sock",            BPF_PROG_TYPE_CGROUP_SOCK,
5834                                                 BPF_CGROUP_INET_SOCK_CREATE),
5835         BPF_EAPROG_SEC("cgroup/post_bind4",     BPF_PROG_TYPE_CGROUP_SOCK,
5836                                                 BPF_CGROUP_INET4_POST_BIND),
5837         BPF_EAPROG_SEC("cgroup/post_bind6",     BPF_PROG_TYPE_CGROUP_SOCK,
5838                                                 BPF_CGROUP_INET6_POST_BIND),
5839         BPF_APROG_SEC("cgroup/dev",             BPF_PROG_TYPE_CGROUP_DEVICE,
5840                                                 BPF_CGROUP_DEVICE),
5841         BPF_APROG_SEC("sockops",                BPF_PROG_TYPE_SOCK_OPS,
5842                                                 BPF_CGROUP_SOCK_OPS),
5843         BPF_APROG_SEC("sk_skb/stream_parser",   BPF_PROG_TYPE_SK_SKB,
5844                                                 BPF_SK_SKB_STREAM_PARSER),
5845         BPF_APROG_SEC("sk_skb/stream_verdict",  BPF_PROG_TYPE_SK_SKB,
5846                                                 BPF_SK_SKB_STREAM_VERDICT),
5847         BPF_APROG_COMPAT("sk_skb",              BPF_PROG_TYPE_SK_SKB),
5848         BPF_APROG_SEC("sk_msg",                 BPF_PROG_TYPE_SK_MSG,
5849                                                 BPF_SK_MSG_VERDICT),
5850         BPF_APROG_SEC("lirc_mode2",             BPF_PROG_TYPE_LIRC_MODE2,
5851                                                 BPF_LIRC_MODE2),
5852         BPF_APROG_SEC("flow_dissector",         BPF_PROG_TYPE_FLOW_DISSECTOR,
5853                                                 BPF_FLOW_DISSECTOR),
5854         BPF_EAPROG_SEC("cgroup/bind4",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
5855                                                 BPF_CGROUP_INET4_BIND),
5856         BPF_EAPROG_SEC("cgroup/bind6",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
5857                                                 BPF_CGROUP_INET6_BIND),
5858         BPF_EAPROG_SEC("cgroup/connect4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
5859                                                 BPF_CGROUP_INET4_CONNECT),
5860         BPF_EAPROG_SEC("cgroup/connect6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
5861                                                 BPF_CGROUP_INET6_CONNECT),
5862         BPF_EAPROG_SEC("cgroup/sendmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
5863                                                 BPF_CGROUP_UDP4_SENDMSG),
5864         BPF_EAPROG_SEC("cgroup/sendmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
5865                                                 BPF_CGROUP_UDP6_SENDMSG),
5866         BPF_EAPROG_SEC("cgroup/recvmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
5867                                                 BPF_CGROUP_UDP4_RECVMSG),
5868         BPF_EAPROG_SEC("cgroup/recvmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
5869                                                 BPF_CGROUP_UDP6_RECVMSG),
5870         BPF_EAPROG_SEC("cgroup/sysctl",         BPF_PROG_TYPE_CGROUP_SYSCTL,
5871                                                 BPF_CGROUP_SYSCTL),
5872         BPF_EAPROG_SEC("cgroup/getsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
5873                                                 BPF_CGROUP_GETSOCKOPT),
5874         BPF_EAPROG_SEC("cgroup/setsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
5875                                                 BPF_CGROUP_SETSOCKOPT),
5876 };
5877
5878 #undef BPF_PROG_SEC_IMPL
5879 #undef BPF_PROG_SEC
5880 #undef BPF_APROG_SEC
5881 #undef BPF_EAPROG_SEC
5882 #undef BPF_APROG_COMPAT
5883 #undef SEC_DEF
5884
5885 #define MAX_TYPE_NAME_SIZE 32
5886
5887 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
5888 {
5889         int i, n = ARRAY_SIZE(section_defs);
5890
5891         for (i = 0; i < n; i++) {
5892                 if (strncmp(sec_name,
5893                             section_defs[i].sec, section_defs[i].len))
5894                         continue;
5895                 return &section_defs[i];
5896         }
5897         return NULL;
5898 }
5899
5900 static char *libbpf_get_type_names(bool attach_type)
5901 {
5902         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
5903         char *buf;
5904
5905         buf = malloc(len);
5906         if (!buf)
5907                 return NULL;
5908
5909         buf[0] = '\0';
5910         /* Forge string buf with all available names */
5911         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
5912                 if (attach_type && !section_defs[i].is_attachable)
5913                         continue;
5914
5915                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
5916                         free(buf);
5917                         return NULL;
5918                 }
5919                 strcat(buf, " ");
5920                 strcat(buf, section_defs[i].sec);
5921         }
5922
5923         return buf;
5924 }
5925
5926 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
5927                              enum bpf_attach_type *expected_attach_type)
5928 {
5929         const struct bpf_sec_def *sec_def;
5930         char *type_names;
5931
5932         if (!name)
5933                 return -EINVAL;
5934
5935         sec_def = find_sec_def(name);
5936         if (sec_def) {
5937                 *prog_type = sec_def->prog_type;
5938                 *expected_attach_type = sec_def->expected_attach_type;
5939                 return 0;
5940         }
5941
5942         pr_debug("failed to guess program type from ELF section '%s'\n", name);
5943         type_names = libbpf_get_type_names(false);
5944         if (type_names != NULL) {
5945                 pr_debug("supported section(type) names are:%s\n", type_names);
5946                 free(type_names);
5947         }
5948
5949         return -ESRCH;
5950 }
5951
5952 #define BTF_PREFIX "btf_trace_"
5953 int libbpf_find_vmlinux_btf_id(const char *name,
5954                                enum bpf_attach_type attach_type)
5955 {
5956         struct btf *btf = bpf_core_find_kernel_btf();
5957         char raw_tp_btf[128] = BTF_PREFIX;
5958         char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1;
5959         const char *btf_name;
5960         int err = -EINVAL;
5961         __u32 kind;
5962
5963         if (IS_ERR(btf)) {
5964                 pr_warn("vmlinux BTF is not found\n");
5965                 return -EINVAL;
5966         }
5967
5968         if (attach_type == BPF_TRACE_RAW_TP) {
5969                 /* prepend "btf_trace_" prefix per kernel convention */
5970                 strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX));
5971                 btf_name = raw_tp_btf;
5972                 kind = BTF_KIND_TYPEDEF;
5973         } else {
5974                 btf_name = name;
5975                 kind = BTF_KIND_FUNC;
5976         }
5977         err = btf__find_by_name_kind(btf, btf_name, kind);
5978         btf__free(btf);
5979         return err;
5980 }
5981
5982 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
5983 {
5984         struct bpf_prog_info_linear *info_linear;
5985         struct bpf_prog_info *info;
5986         struct btf *btf = NULL;
5987         int err = -EINVAL;
5988
5989         info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
5990         if (IS_ERR_OR_NULL(info_linear)) {
5991                 pr_warn("failed get_prog_info_linear for FD %d\n",
5992                         attach_prog_fd);
5993                 return -EINVAL;
5994         }
5995         info = &info_linear->info;
5996         if (!info->btf_id) {
5997                 pr_warn("The target program doesn't have BTF\n");
5998                 goto out;
5999         }
6000         if (btf__get_from_id(info->btf_id, &btf)) {
6001                 pr_warn("Failed to get BTF of the program\n");
6002                 goto out;
6003         }
6004         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
6005         btf__free(btf);
6006         if (err <= 0) {
6007                 pr_warn("%s is not found in prog's BTF\n", name);
6008                 goto out;
6009         }
6010 out:
6011         free(info_linear);
6012         return err;
6013 }
6014
6015 static int libbpf_find_attach_btf_id(const char *name,
6016                                      enum bpf_attach_type attach_type,
6017                                      __u32 attach_prog_fd)
6018 {
6019         int i, err;
6020
6021         if (!name)
6022                 return -EINVAL;
6023
6024         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
6025                 if (!section_defs[i].is_attach_btf)
6026                         continue;
6027                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
6028                         continue;
6029                 if (attach_prog_fd)
6030                         err = libbpf_find_prog_btf_id(name + section_defs[i].len,
6031                                                       attach_prog_fd);
6032                 else
6033                         err = libbpf_find_vmlinux_btf_id(name + section_defs[i].len,
6034                                                          attach_type);
6035                 if (err <= 0)
6036                         pr_warn("%s is not found in vmlinux BTF\n", name);
6037                 return err;
6038         }
6039         pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
6040         return -ESRCH;
6041 }
6042
6043 int libbpf_attach_type_by_name(const char *name,
6044                                enum bpf_attach_type *attach_type)
6045 {
6046         char *type_names;
6047         int i;
6048
6049         if (!name)
6050                 return -EINVAL;
6051
6052         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
6053                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
6054                         continue;
6055                 if (!section_defs[i].is_attachable)
6056                         return -EINVAL;
6057                 *attach_type = section_defs[i].attach_type;
6058                 return 0;
6059         }
6060         pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
6061         type_names = libbpf_get_type_names(true);
6062         if (type_names != NULL) {
6063                 pr_debug("attachable section(type) names are:%s\n", type_names);
6064                 free(type_names);
6065         }
6066
6067         return -EINVAL;
6068 }
6069
6070 int bpf_map__fd(const struct bpf_map *map)
6071 {
6072         return map ? map->fd : -EINVAL;
6073 }
6074
6075 const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
6076 {
6077         return map ? &map->def : ERR_PTR(-EINVAL);
6078 }
6079
6080 const char *bpf_map__name(const struct bpf_map *map)
6081 {
6082         return map ? map->name : NULL;
6083 }
6084
6085 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
6086 {
6087         return map ? map->btf_key_type_id : 0;
6088 }
6089
6090 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
6091 {
6092         return map ? map->btf_value_type_id : 0;
6093 }
6094
6095 int bpf_map__set_priv(struct bpf_map *map, void *priv,
6096                      bpf_map_clear_priv_t clear_priv)
6097 {
6098         if (!map)
6099                 return -EINVAL;
6100
6101         if (map->priv) {
6102                 if (map->clear_priv)
6103                         map->clear_priv(map, map->priv);
6104         }
6105
6106         map->priv = priv;
6107         map->clear_priv = clear_priv;
6108         return 0;
6109 }
6110
6111 void *bpf_map__priv(const struct bpf_map *map)
6112 {
6113         return map ? map->priv : ERR_PTR(-EINVAL);
6114 }
6115
6116 bool bpf_map__is_offload_neutral(const struct bpf_map *map)
6117 {
6118         return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
6119 }
6120
6121 bool bpf_map__is_internal(const struct bpf_map *map)
6122 {
6123         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
6124 }
6125
6126 void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
6127 {
6128         map->map_ifindex = ifindex;
6129 }
6130
6131 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
6132 {
6133         if (!bpf_map_type__is_map_in_map(map->def.type)) {
6134                 pr_warn("error: unsupported map type\n");
6135                 return -EINVAL;
6136         }
6137         if (map->inner_map_fd != -1) {
6138                 pr_warn("error: inner_map_fd already specified\n");
6139                 return -EINVAL;
6140         }
6141         map->inner_map_fd = fd;
6142         return 0;
6143 }
6144
6145 static struct bpf_map *
6146 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
6147 {
6148         ssize_t idx;
6149         struct bpf_map *s, *e;
6150
6151         if (!obj || !obj->maps)
6152                 return NULL;
6153
6154         s = obj->maps;
6155         e = obj->maps + obj->nr_maps;
6156
6157         if ((m < s) || (m >= e)) {
6158                 pr_warn("error in %s: map handler doesn't belong to object\n",
6159                          __func__);
6160                 return NULL;
6161         }
6162
6163         idx = (m - obj->maps) + i;
6164         if (idx >= obj->nr_maps || idx < 0)
6165                 return NULL;
6166         return &obj->maps[idx];
6167 }
6168
6169 struct bpf_map *
6170 bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
6171 {
6172         if (prev == NULL)
6173                 return obj->maps;
6174
6175         return __bpf_map__iter(prev, obj, 1);
6176 }
6177
6178 struct bpf_map *
6179 bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
6180 {
6181         if (next == NULL) {
6182                 if (!obj->nr_maps)
6183                         return NULL;
6184                 return obj->maps + obj->nr_maps - 1;
6185         }
6186
6187         return __bpf_map__iter(next, obj, -1);
6188 }
6189
6190 struct bpf_map *
6191 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
6192 {
6193         struct bpf_map *pos;
6194
6195         bpf_object__for_each_map(pos, obj) {
6196                 if (pos->name && !strcmp(pos->name, name))
6197                         return pos;
6198         }
6199         return NULL;
6200 }
6201
6202 int
6203 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
6204 {
6205         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
6206 }
6207
6208 struct bpf_map *
6209 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
6210 {
6211         return ERR_PTR(-ENOTSUP);
6212 }
6213
6214 long libbpf_get_error(const void *ptr)
6215 {
6216         return PTR_ERR_OR_ZERO(ptr);
6217 }
6218
6219 int bpf_prog_load(const char *file, enum bpf_prog_type type,
6220                   struct bpf_object **pobj, int *prog_fd)
6221 {
6222         struct bpf_prog_load_attr attr;
6223
6224         memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
6225         attr.file = file;
6226         attr.prog_type = type;
6227         attr.expected_attach_type = 0;
6228
6229         return bpf_prog_load_xattr(&attr, pobj, prog_fd);
6230 }
6231
6232 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
6233                         struct bpf_object **pobj, int *prog_fd)
6234 {
6235         struct bpf_object_open_attr open_attr = {};
6236         struct bpf_program *prog, *first_prog = NULL;
6237         struct bpf_object *obj;
6238         struct bpf_map *map;
6239         int err;
6240
6241         if (!attr)
6242                 return -EINVAL;
6243         if (!attr->file)
6244                 return -EINVAL;
6245
6246         open_attr.file = attr->file;
6247         open_attr.prog_type = attr->prog_type;
6248
6249         obj = bpf_object__open_xattr(&open_attr);
6250         if (IS_ERR_OR_NULL(obj))
6251                 return -ENOENT;
6252
6253         bpf_object__for_each_program(prog, obj) {
6254                 enum bpf_attach_type attach_type = attr->expected_attach_type;
6255                 /*
6256                  * to preserve backwards compatibility, bpf_prog_load treats
6257                  * attr->prog_type, if specified, as an override to whatever
6258                  * bpf_object__open guessed
6259                  */
6260                 if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
6261                         bpf_program__set_type(prog, attr->prog_type);
6262                         bpf_program__set_expected_attach_type(prog,
6263                                                               attach_type);
6264                 }
6265                 if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
6266                         /*
6267                          * we haven't guessed from section name and user
6268                          * didn't provide a fallback type, too bad...
6269                          */
6270                         bpf_object__close(obj);
6271                         return -EINVAL;
6272                 }
6273
6274                 prog->prog_ifindex = attr->ifindex;
6275                 prog->log_level = attr->log_level;
6276                 prog->prog_flags = attr->prog_flags;
6277                 if (!first_prog)
6278                         first_prog = prog;
6279         }
6280
6281         bpf_object__for_each_map(map, obj) {
6282                 if (!bpf_map__is_offload_neutral(map))
6283                         map->map_ifindex = attr->ifindex;
6284         }
6285
6286         if (!first_prog) {
6287                 pr_warn("object file doesn't contain bpf program\n");
6288                 bpf_object__close(obj);
6289                 return -ENOENT;
6290         }
6291
6292         err = bpf_object__load(obj);
6293         if (err) {
6294                 bpf_object__close(obj);
6295                 return -EINVAL;
6296         }
6297
6298         *pobj = obj;
6299         *prog_fd = bpf_program__fd(first_prog);
6300         return 0;
6301 }
6302
6303 struct bpf_link {
6304         int (*detach)(struct bpf_link *link);
6305         int (*destroy)(struct bpf_link *link);
6306         bool disconnected;
6307 };
6308
6309 /* Release "ownership" of underlying BPF resource (typically, BPF program
6310  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
6311  * link, when destructed through bpf_link__destroy() call won't attempt to
6312  * detach/unregisted that BPF resource. This is useful in situations where,
6313  * say, attached BPF program has to outlive userspace program that attached it
6314  * in the system. Depending on type of BPF program, though, there might be
6315  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
6316  * exit of userspace program doesn't trigger automatic detachment and clean up
6317  * inside the kernel.
6318  */
6319 void bpf_link__disconnect(struct bpf_link *link)
6320 {
6321         link->disconnected = true;
6322 }
6323
6324 int bpf_link__destroy(struct bpf_link *link)
6325 {
6326         int err = 0;
6327
6328         if (!link)
6329                 return 0;
6330
6331         if (!link->disconnected && link->detach)
6332                 err = link->detach(link);
6333         if (link->destroy)
6334                 link->destroy(link);
6335         free(link);
6336
6337         return err;
6338 }
6339
6340 struct bpf_link_fd {
6341         struct bpf_link link; /* has to be at the top of struct */
6342         int fd; /* hook FD */
6343 };
6344
6345 static int bpf_link__detach_perf_event(struct bpf_link *link)
6346 {
6347         struct bpf_link_fd *l = (void *)link;
6348         int err;
6349
6350         err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0);
6351         if (err)
6352                 err = -errno;
6353
6354         close(l->fd);
6355         return err;
6356 }
6357
6358 struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
6359                                                 int pfd)
6360 {
6361         char errmsg[STRERR_BUFSIZE];
6362         struct bpf_link_fd *link;
6363         int prog_fd, err;
6364
6365         if (pfd < 0) {
6366                 pr_warn("program '%s': invalid perf event FD %d\n",
6367                         bpf_program__title(prog, false), pfd);
6368                 return ERR_PTR(-EINVAL);
6369         }
6370         prog_fd = bpf_program__fd(prog);
6371         if (prog_fd < 0) {
6372                 pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
6373                         bpf_program__title(prog, false));
6374                 return ERR_PTR(-EINVAL);
6375         }
6376
6377         link = calloc(1, sizeof(*link));
6378         if (!link)
6379                 return ERR_PTR(-ENOMEM);
6380         link->link.detach = &bpf_link__detach_perf_event;
6381         link->fd = pfd;
6382
6383         if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
6384                 err = -errno;
6385                 free(link);
6386                 pr_warn("program '%s': failed to attach to pfd %d: %s\n",
6387                         bpf_program__title(prog, false), pfd,
6388                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
6389                 return ERR_PTR(err);
6390         }
6391         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
6392                 err = -errno;
6393                 free(link);
6394                 pr_warn("program '%s': failed to enable pfd %d: %s\n",
6395                         bpf_program__title(prog, false), pfd,
6396                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
6397                 return ERR_PTR(err);
6398         }
6399         return (struct bpf_link *)link;
6400 }
6401
6402 /*
6403  * this function is expected to parse integer in the range of [0, 2^31-1] from
6404  * given file using scanf format string fmt. If actual parsed value is
6405  * negative, the result might be indistinguishable from error
6406  */
6407 static int parse_uint_from_file(const char *file, const char *fmt)
6408 {
6409         char buf[STRERR_BUFSIZE];
6410         int err, ret;
6411         FILE *f;
6412
6413         f = fopen(file, "r");
6414         if (!f) {
6415                 err = -errno;
6416                 pr_debug("failed to open '%s': %s\n", file,
6417                          libbpf_strerror_r(err, buf, sizeof(buf)));
6418                 return err;
6419         }
6420         err = fscanf(f, fmt, &ret);
6421         if (err != 1) {
6422                 err = err == EOF ? -EIO : -errno;
6423                 pr_debug("failed to parse '%s': %s\n", file,
6424                         libbpf_strerror_r(err, buf, sizeof(buf)));
6425                 fclose(f);
6426                 return err;
6427         }
6428         fclose(f);
6429         return ret;
6430 }
6431
6432 static int determine_kprobe_perf_type(void)
6433 {
6434         const char *file = "/sys/bus/event_source/devices/kprobe/type";
6435
6436         return parse_uint_from_file(file, "%d\n");
6437 }
6438
6439 static int determine_uprobe_perf_type(void)
6440 {
6441         const char *file = "/sys/bus/event_source/devices/uprobe/type";
6442
6443         return parse_uint_from_file(file, "%d\n");
6444 }
6445
6446 static int determine_kprobe_retprobe_bit(void)
6447 {
6448         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
6449
6450         return parse_uint_from_file(file, "config:%d\n");
6451 }
6452
6453 static int determine_uprobe_retprobe_bit(void)
6454 {
6455         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
6456
6457         return parse_uint_from_file(file, "config:%d\n");
6458 }
6459
6460 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
6461                                  uint64_t offset, int pid)
6462 {
6463         struct perf_event_attr attr = {};
6464         char errmsg[STRERR_BUFSIZE];
6465         int type, pfd, err;
6466
6467         type = uprobe ? determine_uprobe_perf_type()
6468                       : determine_kprobe_perf_type();
6469         if (type < 0) {
6470                 pr_warn("failed to determine %s perf type: %s\n",
6471                         uprobe ? "uprobe" : "kprobe",
6472                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
6473                 return type;
6474         }
6475         if (retprobe) {
6476                 int bit = uprobe ? determine_uprobe_retprobe_bit()
6477                                  : determine_kprobe_retprobe_bit();
6478
6479                 if (bit < 0) {
6480                         pr_warn("failed to determine %s retprobe bit: %s\n",
6481                                 uprobe ? "uprobe" : "kprobe",
6482                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
6483                         return bit;
6484                 }
6485                 attr.config |= 1 << bit;
6486         }
6487         attr.size = sizeof(attr);
6488         attr.type = type;
6489         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
6490         attr.config2 = offset;           /* kprobe_addr or probe_offset */
6491
6492         /* pid filter is meaningful only for uprobes */
6493         pfd = syscall(__NR_perf_event_open, &attr,
6494                       pid < 0 ? -1 : pid /* pid */,
6495                       pid == -1 ? 0 : -1 /* cpu */,
6496                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
6497         if (pfd < 0) {
6498                 err = -errno;
6499                 pr_warn("%s perf_event_open() failed: %s\n",
6500                         uprobe ? "uprobe" : "kprobe",
6501                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
6502                 return err;
6503         }
6504         return pfd;
6505 }
6506
6507 struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
6508                                             bool retprobe,
6509                                             const char *func_name)
6510 {
6511         char errmsg[STRERR_BUFSIZE];
6512         struct bpf_link *link;
6513         int pfd, err;
6514
6515         pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
6516                                     0 /* offset */, -1 /* pid */);
6517         if (pfd < 0) {
6518                 pr_warn("program '%s': failed to create %s '%s' perf event: %s\n",
6519                         bpf_program__title(prog, false),
6520                         retprobe ? "kretprobe" : "kprobe", func_name,
6521                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
6522                 return ERR_PTR(pfd);
6523         }
6524         link = bpf_program__attach_perf_event(prog, pfd);
6525         if (IS_ERR(link)) {
6526                 close(pfd);
6527                 err = PTR_ERR(link);
6528                 pr_warn("program '%s': failed to attach to %s '%s': %s\n",
6529                         bpf_program__title(prog, false),
6530                         retprobe ? "kretprobe" : "kprobe", func_name,
6531                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
6532                 return link;
6533         }
6534         return link;
6535 }
6536
6537 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
6538                                       struct bpf_program *prog)
6539 {
6540         const char *func_name;
6541         bool retprobe;
6542
6543         func_name = bpf_program__title(prog, false) + sec->len;
6544         retprobe = strcmp(sec->sec, "kretprobe/") == 0;
6545
6546         return bpf_program__attach_kprobe(prog, retprobe, func_name);
6547 }
6548
6549 struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
6550                                             bool retprobe, pid_t pid,
6551                                             const char *binary_path,
6552                                             size_t func_offset)
6553 {
6554         char errmsg[STRERR_BUFSIZE];
6555         struct bpf_link *link;
6556         int pfd, err;
6557
6558         pfd = perf_event_open_probe(true /* uprobe */, retprobe,
6559                                     binary_path, func_offset, pid);
6560         if (pfd < 0) {
6561                 pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
6562                         bpf_program__title(prog, false),
6563                         retprobe ? "uretprobe" : "uprobe",
6564                         binary_path, func_offset,
6565                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
6566                 return ERR_PTR(pfd);
6567         }
6568         link = bpf_program__attach_perf_event(prog, pfd);
6569         if (IS_ERR(link)) {
6570                 close(pfd);
6571                 err = PTR_ERR(link);
6572                 pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
6573                         bpf_program__title(prog, false),
6574                         retprobe ? "uretprobe" : "uprobe",
6575                         binary_path, func_offset,
6576                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
6577                 return link;
6578         }
6579         return link;
6580 }
6581
6582 static int determine_tracepoint_id(const char *tp_category,
6583                                    const char *tp_name)
6584 {
6585         char file[PATH_MAX];
6586         int ret;
6587
6588         ret = snprintf(file, sizeof(file),
6589                        "/sys/kernel/debug/tracing/events/%s/%s/id",
6590                        tp_category, tp_name);
6591         if (ret < 0)
6592                 return -errno;
6593         if (ret >= sizeof(file)) {
6594                 pr_debug("tracepoint %s/%s path is too long\n",
6595                          tp_category, tp_name);
6596                 return -E2BIG;
6597         }
6598         return parse_uint_from_file(file, "%d\n");
6599 }
6600
6601 static int perf_event_open_tracepoint(const char *tp_category,
6602                                       const char *tp_name)
6603 {
6604         struct perf_event_attr attr = {};
6605         char errmsg[STRERR_BUFSIZE];
6606         int tp_id, pfd, err;
6607
6608         tp_id = determine_tracepoint_id(tp_category, tp_name);
6609         if (tp_id < 0) {
6610                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
6611                         tp_category, tp_name,
6612                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
6613                 return tp_id;
6614         }
6615
6616         attr.type = PERF_TYPE_TRACEPOINT;
6617         attr.size = sizeof(attr);
6618         attr.config = tp_id;
6619
6620         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
6621                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
6622         if (pfd < 0) {
6623                 err = -errno;
6624                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
6625                         tp_category, tp_name,
6626                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
6627                 return err;
6628         }
6629         return pfd;
6630 }
6631
6632 struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
6633                                                 const char *tp_category,
6634                                                 const char *tp_name)
6635 {
6636         char errmsg[STRERR_BUFSIZE];
6637         struct bpf_link *link;
6638         int pfd, err;
6639
6640         pfd = perf_event_open_tracepoint(tp_category, tp_name);
6641         if (pfd < 0) {
6642                 pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
6643                         bpf_program__title(prog, false),
6644                         tp_category, tp_name,
6645                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
6646                 return ERR_PTR(pfd);
6647         }
6648         link = bpf_program__attach_perf_event(prog, pfd);
6649         if (IS_ERR(link)) {
6650                 close(pfd);
6651                 err = PTR_ERR(link);
6652                 pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
6653                         bpf_program__title(prog, false),
6654                         tp_category, tp_name,
6655                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
6656                 return link;
6657         }
6658         return link;
6659 }
6660
6661 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
6662                                   struct bpf_program *prog)
6663 {
6664         char *sec_name, *tp_cat, *tp_name;
6665         struct bpf_link *link;
6666
6667         sec_name = strdup(bpf_program__title(prog, false));
6668         if (!sec_name)
6669                 return ERR_PTR(-ENOMEM);
6670
6671         /* extract "tp/<category>/<name>" */
6672         tp_cat = sec_name + sec->len;
6673         tp_name = strchr(tp_cat, '/');
6674         if (!tp_name) {
6675                 link = ERR_PTR(-EINVAL);
6676                 goto out;
6677         }
6678         *tp_name = '\0';
6679         tp_name++;
6680
6681         link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
6682 out:
6683         free(sec_name);
6684         return link;
6685 }
6686
6687 static int bpf_link__detach_fd(struct bpf_link *link)
6688 {
6689         struct bpf_link_fd *l = (void *)link;
6690
6691         return close(l->fd);
6692 }
6693
6694 struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
6695                                                     const char *tp_name)
6696 {
6697         char errmsg[STRERR_BUFSIZE];
6698         struct bpf_link_fd *link;
6699         int prog_fd, pfd;
6700
6701         prog_fd = bpf_program__fd(prog);
6702         if (prog_fd < 0) {
6703                 pr_warn("program '%s': can't attach before loaded\n",
6704                         bpf_program__title(prog, false));
6705                 return ERR_PTR(-EINVAL);
6706         }
6707
6708         link = calloc(1, sizeof(*link));
6709         if (!link)
6710                 return ERR_PTR(-ENOMEM);
6711         link->link.detach = &bpf_link__detach_fd;
6712
6713         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
6714         if (pfd < 0) {
6715                 pfd = -errno;
6716                 free(link);
6717                 pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n",
6718                         bpf_program__title(prog, false), tp_name,
6719                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
6720                 return ERR_PTR(pfd);
6721         }
6722         link->fd = pfd;
6723         return (struct bpf_link *)link;
6724 }
6725
6726 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
6727                                       struct bpf_program *prog)
6728 {
6729         const char *tp_name = bpf_program__title(prog, false) + sec->len;
6730
6731         return bpf_program__attach_raw_tracepoint(prog, tp_name);
6732 }
6733
6734 struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
6735 {
6736         char errmsg[STRERR_BUFSIZE];
6737         struct bpf_link_fd *link;
6738         int prog_fd, pfd;
6739
6740         prog_fd = bpf_program__fd(prog);
6741         if (prog_fd < 0) {
6742                 pr_warn("program '%s': can't attach before loaded\n",
6743                         bpf_program__title(prog, false));
6744                 return ERR_PTR(-EINVAL);
6745         }
6746
6747         link = calloc(1, sizeof(*link));
6748         if (!link)
6749                 return ERR_PTR(-ENOMEM);
6750         link->link.detach = &bpf_link__detach_fd;
6751
6752         pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
6753         if (pfd < 0) {
6754                 pfd = -errno;
6755                 free(link);
6756                 pr_warn("program '%s': failed to attach to trace: %s\n",
6757                         bpf_program__title(prog, false),
6758                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
6759                 return ERR_PTR(pfd);
6760         }
6761         link->fd = pfd;
6762         return (struct bpf_link *)link;
6763 }
6764
6765 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
6766                                      struct bpf_program *prog)
6767 {
6768         return bpf_program__attach_trace(prog);
6769 }
6770
6771 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
6772 {
6773         const struct bpf_sec_def *sec_def;
6774
6775         sec_def = find_sec_def(bpf_program__title(prog, false));
6776         if (!sec_def || !sec_def->attach_fn)
6777                 return ERR_PTR(-ESRCH);
6778
6779         return sec_def->attach_fn(sec_def, prog);
6780 }
6781
6782 enum bpf_perf_event_ret
6783 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
6784                            void **copy_mem, size_t *copy_size,
6785                            bpf_perf_event_print_t fn, void *private_data)
6786 {
6787         struct perf_event_mmap_page *header = mmap_mem;
6788         __u64 data_head = ring_buffer_read_head(header);
6789         __u64 data_tail = header->data_tail;
6790         void *base = ((__u8 *)header) + page_size;
6791         int ret = LIBBPF_PERF_EVENT_CONT;
6792         struct perf_event_header *ehdr;
6793         size_t ehdr_size;
6794
6795         while (data_head != data_tail) {
6796                 ehdr = base + (data_tail & (mmap_size - 1));
6797                 ehdr_size = ehdr->size;
6798
6799                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
6800                         void *copy_start = ehdr;
6801                         size_t len_first = base + mmap_size - copy_start;
6802                         size_t len_secnd = ehdr_size - len_first;
6803
6804                         if (*copy_size < ehdr_size) {
6805                                 free(*copy_mem);
6806                                 *copy_mem = malloc(ehdr_size);
6807                                 if (!*copy_mem) {
6808                                         *copy_size = 0;
6809                                         ret = LIBBPF_PERF_EVENT_ERROR;
6810                                         break;
6811                                 }
6812                                 *copy_size = ehdr_size;
6813                         }
6814
6815                         memcpy(*copy_mem, copy_start, len_first);
6816                         memcpy(*copy_mem + len_first, base, len_secnd);
6817                         ehdr = *copy_mem;
6818                 }
6819
6820                 ret = fn(ehdr, private_data);
6821                 data_tail += ehdr_size;
6822                 if (ret != LIBBPF_PERF_EVENT_CONT)
6823                         break;
6824         }
6825
6826         ring_buffer_write_tail(header, data_tail);
6827         return ret;
6828 }
6829
6830 struct perf_buffer;
6831
6832 struct perf_buffer_params {
6833         struct perf_event_attr *attr;
6834         /* if event_cb is specified, it takes precendence */
6835         perf_buffer_event_fn event_cb;
6836         /* sample_cb and lost_cb are higher-level common-case callbacks */
6837         perf_buffer_sample_fn sample_cb;
6838         perf_buffer_lost_fn lost_cb;
6839         void *ctx;
6840         int cpu_cnt;
6841         int *cpus;
6842         int *map_keys;
6843 };
6844
6845 struct perf_cpu_buf {
6846         struct perf_buffer *pb;
6847         void *base; /* mmap()'ed memory */
6848         void *buf; /* for reconstructing segmented data */
6849         size_t buf_size;
6850         int fd;
6851         int cpu;
6852         int map_key;
6853 };
6854
6855 struct perf_buffer {
6856         perf_buffer_event_fn event_cb;
6857         perf_buffer_sample_fn sample_cb;
6858         perf_buffer_lost_fn lost_cb;
6859         void *ctx; /* passed into callbacks */
6860
6861         size_t page_size;
6862         size_t mmap_size;
6863         struct perf_cpu_buf **cpu_bufs;
6864         struct epoll_event *events;
6865         int cpu_cnt; /* number of allocated CPU buffers */
6866         int epoll_fd; /* perf event FD */
6867         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
6868 };
6869
6870 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
6871                                       struct perf_cpu_buf *cpu_buf)
6872 {
6873         if (!cpu_buf)
6874                 return;
6875         if (cpu_buf->base &&
6876             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
6877                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
6878         if (cpu_buf->fd >= 0) {
6879                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
6880                 close(cpu_buf->fd);
6881         }
6882         free(cpu_buf->buf);
6883         free(cpu_buf);
6884 }
6885
6886 void perf_buffer__free(struct perf_buffer *pb)
6887 {
6888         int i;
6889
6890         if (!pb)
6891                 return;
6892         if (pb->cpu_bufs) {
6893                 for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) {
6894                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
6895
6896                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
6897                         perf_buffer__free_cpu_buf(pb, cpu_buf);
6898                 }
6899                 free(pb->cpu_bufs);
6900         }
6901         if (pb->epoll_fd >= 0)
6902                 close(pb->epoll_fd);
6903         free(pb->events);
6904         free(pb);
6905 }
6906
6907 static struct perf_cpu_buf *
6908 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
6909                           int cpu, int map_key)
6910 {
6911         struct perf_cpu_buf *cpu_buf;
6912         char msg[STRERR_BUFSIZE];
6913         int err;
6914
6915         cpu_buf = calloc(1, sizeof(*cpu_buf));
6916         if (!cpu_buf)
6917                 return ERR_PTR(-ENOMEM);
6918
6919         cpu_buf->pb = pb;
6920         cpu_buf->cpu = cpu;
6921         cpu_buf->map_key = map_key;
6922
6923         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
6924                               -1, PERF_FLAG_FD_CLOEXEC);
6925         if (cpu_buf->fd < 0) {
6926                 err = -errno;
6927                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
6928                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
6929                 goto error;
6930         }
6931
6932         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
6933                              PROT_READ | PROT_WRITE, MAP_SHARED,
6934                              cpu_buf->fd, 0);
6935         if (cpu_buf->base == MAP_FAILED) {
6936                 cpu_buf->base = NULL;
6937                 err = -errno;
6938                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
6939                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
6940                 goto error;
6941         }
6942
6943         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
6944                 err = -errno;
6945                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
6946                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
6947                 goto error;
6948         }
6949
6950         return cpu_buf;
6951
6952 error:
6953         perf_buffer__free_cpu_buf(pb, cpu_buf);
6954         return (struct perf_cpu_buf *)ERR_PTR(err);
6955 }
6956
6957 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
6958                                               struct perf_buffer_params *p);
6959
6960 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
6961                                      const struct perf_buffer_opts *opts)
6962 {
6963         struct perf_buffer_params p = {};
6964         struct perf_event_attr attr = { 0, };
6965
6966         attr.config = PERF_COUNT_SW_BPF_OUTPUT,
6967         attr.type = PERF_TYPE_SOFTWARE;
6968         attr.sample_type = PERF_SAMPLE_RAW;
6969         attr.sample_period = 1;
6970         attr.wakeup_events = 1;
6971
6972         p.attr = &attr;
6973         p.sample_cb = opts ? opts->sample_cb : NULL;
6974         p.lost_cb = opts ? opts->lost_cb : NULL;
6975         p.ctx = opts ? opts->ctx : NULL;
6976
6977         return __perf_buffer__new(map_fd, page_cnt, &p);
6978 }
6979
6980 struct perf_buffer *
6981 perf_buffer__new_raw(int map_fd, size_t page_cnt,
6982                      const struct perf_buffer_raw_opts *opts)
6983 {
6984         struct perf_buffer_params p = {};
6985
6986         p.attr = opts->attr;
6987         p.event_cb = opts->event_cb;
6988         p.ctx = opts->ctx;
6989         p.cpu_cnt = opts->cpu_cnt;
6990         p.cpus = opts->cpus;
6991         p.map_keys = opts->map_keys;
6992
6993         return __perf_buffer__new(map_fd, page_cnt, &p);
6994 }
6995
6996 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
6997                                               struct perf_buffer_params *p)
6998 {
6999         const char *online_cpus_file = "/sys/devices/system/cpu/online";
7000         struct bpf_map_info map = {};
7001         char msg[STRERR_BUFSIZE];
7002         struct perf_buffer *pb;
7003         bool *online = NULL;
7004         __u32 map_info_len;
7005         int err, i, j, n;
7006
7007         if (page_cnt & (page_cnt - 1)) {
7008                 pr_warn("page count should be power of two, but is %zu\n",
7009                         page_cnt);
7010                 return ERR_PTR(-EINVAL);
7011         }
7012
7013         map_info_len = sizeof(map);
7014         err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
7015         if (err) {
7016                 err = -errno;
7017                 pr_warn("failed to get map info for map FD %d: %s\n",
7018                         map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
7019                 return ERR_PTR(err);
7020         }
7021
7022         if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
7023                 pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
7024                         map.name);
7025                 return ERR_PTR(-EINVAL);
7026         }
7027
7028         pb = calloc(1, sizeof(*pb));
7029         if (!pb)
7030                 return ERR_PTR(-ENOMEM);
7031
7032         pb->event_cb = p->event_cb;
7033         pb->sample_cb = p->sample_cb;
7034         pb->lost_cb = p->lost_cb;
7035         pb->ctx = p->ctx;
7036
7037         pb->page_size = getpagesize();
7038         pb->mmap_size = pb->page_size * page_cnt;
7039         pb->map_fd = map_fd;
7040
7041         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
7042         if (pb->epoll_fd < 0) {
7043                 err = -errno;
7044                 pr_warn("failed to create epoll instance: %s\n",
7045                         libbpf_strerror_r(err, msg, sizeof(msg)));
7046                 goto error;
7047         }
7048
7049         if (p->cpu_cnt > 0) {
7050                 pb->cpu_cnt = p->cpu_cnt;
7051         } else {
7052                 pb->cpu_cnt = libbpf_num_possible_cpus();
7053                 if (pb->cpu_cnt < 0) {
7054                         err = pb->cpu_cnt;
7055                         goto error;
7056                 }
7057                 if (map.max_entries < pb->cpu_cnt)
7058                         pb->cpu_cnt = map.max_entries;
7059         }
7060
7061         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
7062         if (!pb->events) {
7063                 err = -ENOMEM;
7064                 pr_warn("failed to allocate events: out of memory\n");
7065                 goto error;
7066         }
7067         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
7068         if (!pb->cpu_bufs) {
7069                 err = -ENOMEM;
7070                 pr_warn("failed to allocate buffers: out of memory\n");
7071                 goto error;
7072         }
7073
7074         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
7075         if (err) {
7076                 pr_warn("failed to get online CPU mask: %d\n", err);
7077                 goto error;
7078         }
7079
7080         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
7081                 struct perf_cpu_buf *cpu_buf;
7082                 int cpu, map_key;
7083
7084                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
7085                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
7086
7087                 /* in case user didn't explicitly requested particular CPUs to
7088                  * be attached to, skip offline/not present CPUs
7089                  */
7090                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
7091                         continue;
7092
7093                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
7094                 if (IS_ERR(cpu_buf)) {
7095                         err = PTR_ERR(cpu_buf);
7096                         goto error;
7097                 }
7098
7099                 pb->cpu_bufs[j] = cpu_buf;
7100
7101                 err = bpf_map_update_elem(pb->map_fd, &map_key,
7102                                           &cpu_buf->fd, 0);
7103                 if (err) {
7104                         err = -errno;
7105                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
7106                                 cpu, map_key, cpu_buf->fd,
7107                                 libbpf_strerror_r(err, msg, sizeof(msg)));
7108                         goto error;
7109                 }
7110
7111                 pb->events[j].events = EPOLLIN;
7112                 pb->events[j].data.ptr = cpu_buf;
7113                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
7114                               &pb->events[j]) < 0) {
7115                         err = -errno;
7116                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
7117                                 cpu, cpu_buf->fd,
7118                                 libbpf_strerror_r(err, msg, sizeof(msg)));
7119                         goto error;
7120                 }
7121                 j++;
7122         }
7123         pb->cpu_cnt = j;
7124         free(online);
7125
7126         return pb;
7127
7128 error:
7129         free(online);
7130         if (pb)
7131                 perf_buffer__free(pb);
7132         return ERR_PTR(err);
7133 }
7134
7135 struct perf_sample_raw {
7136         struct perf_event_header header;
7137         uint32_t size;
7138         char data[0];
7139 };
7140
7141 struct perf_sample_lost {
7142         struct perf_event_header header;
7143         uint64_t id;
7144         uint64_t lost;
7145         uint64_t sample_id;
7146 };
7147
7148 static enum bpf_perf_event_ret
7149 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
7150 {
7151         struct perf_cpu_buf *cpu_buf = ctx;
7152         struct perf_buffer *pb = cpu_buf->pb;
7153         void *data = e;
7154
7155         /* user wants full control over parsing perf event */
7156         if (pb->event_cb)
7157                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
7158
7159         switch (e->type) {
7160         case PERF_RECORD_SAMPLE: {
7161                 struct perf_sample_raw *s = data;
7162
7163                 if (pb->sample_cb)
7164                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
7165                 break;
7166         }
7167         case PERF_RECORD_LOST: {
7168                 struct perf_sample_lost *s = data;
7169
7170                 if (pb->lost_cb)
7171                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
7172                 break;
7173         }
7174         default:
7175                 pr_warn("unknown perf sample type %d\n", e->type);
7176                 return LIBBPF_PERF_EVENT_ERROR;
7177         }
7178         return LIBBPF_PERF_EVENT_CONT;
7179 }
7180
7181 static int perf_buffer__process_records(struct perf_buffer *pb,
7182                                         struct perf_cpu_buf *cpu_buf)
7183 {
7184         enum bpf_perf_event_ret ret;
7185
7186         ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
7187                                          pb->page_size, &cpu_buf->buf,
7188                                          &cpu_buf->buf_size,
7189                                          perf_buffer__process_record, cpu_buf);
7190         if (ret != LIBBPF_PERF_EVENT_CONT)
7191                 return ret;
7192         return 0;
7193 }
7194
7195 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
7196 {
7197         int i, cnt, err;
7198
7199         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
7200         for (i = 0; i < cnt; i++) {
7201                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
7202
7203                 err = perf_buffer__process_records(pb, cpu_buf);
7204                 if (err) {
7205                         pr_warn("error while processing records: %d\n", err);
7206                         return err;
7207                 }
7208         }
7209         return cnt < 0 ? -errno : cnt;
7210 }
7211
7212 struct bpf_prog_info_array_desc {
7213         int     array_offset;   /* e.g. offset of jited_prog_insns */
7214         int     count_offset;   /* e.g. offset of jited_prog_len */
7215         int     size_offset;    /* > 0: offset of rec size,
7216                                  * < 0: fix size of -size_offset
7217                                  */
7218 };
7219
7220 static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
7221         [BPF_PROG_INFO_JITED_INSNS] = {
7222                 offsetof(struct bpf_prog_info, jited_prog_insns),
7223                 offsetof(struct bpf_prog_info, jited_prog_len),
7224                 -1,
7225         },
7226         [BPF_PROG_INFO_XLATED_INSNS] = {
7227                 offsetof(struct bpf_prog_info, xlated_prog_insns),
7228                 offsetof(struct bpf_prog_info, xlated_prog_len),
7229                 -1,
7230         },
7231         [BPF_PROG_INFO_MAP_IDS] = {
7232                 offsetof(struct bpf_prog_info, map_ids),
7233                 offsetof(struct bpf_prog_info, nr_map_ids),
7234                 -(int)sizeof(__u32),
7235         },
7236         [BPF_PROG_INFO_JITED_KSYMS] = {
7237                 offsetof(struct bpf_prog_info, jited_ksyms),
7238                 offsetof(struct bpf_prog_info, nr_jited_ksyms),
7239                 -(int)sizeof(__u64),
7240         },
7241         [BPF_PROG_INFO_JITED_FUNC_LENS] = {
7242                 offsetof(struct bpf_prog_info, jited_func_lens),
7243                 offsetof(struct bpf_prog_info, nr_jited_func_lens),
7244                 -(int)sizeof(__u32),
7245         },
7246         [BPF_PROG_INFO_FUNC_INFO] = {
7247                 offsetof(struct bpf_prog_info, func_info),
7248                 offsetof(struct bpf_prog_info, nr_func_info),
7249                 offsetof(struct bpf_prog_info, func_info_rec_size),
7250         },
7251         [BPF_PROG_INFO_LINE_INFO] = {
7252                 offsetof(struct bpf_prog_info, line_info),
7253                 offsetof(struct bpf_prog_info, nr_line_info),
7254                 offsetof(struct bpf_prog_info, line_info_rec_size),
7255         },
7256         [BPF_PROG_INFO_JITED_LINE_INFO] = {
7257                 offsetof(struct bpf_prog_info, jited_line_info),
7258                 offsetof(struct bpf_prog_info, nr_jited_line_info),
7259                 offsetof(struct bpf_prog_info, jited_line_info_rec_size),
7260         },
7261         [BPF_PROG_INFO_PROG_TAGS] = {
7262                 offsetof(struct bpf_prog_info, prog_tags),
7263                 offsetof(struct bpf_prog_info, nr_prog_tags),
7264                 -(int)sizeof(__u8) * BPF_TAG_SIZE,
7265         },
7266
7267 };
7268
7269 static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
7270                                            int offset)
7271 {
7272         __u32 *array = (__u32 *)info;
7273
7274         if (offset >= 0)
7275                 return array[offset / sizeof(__u32)];
7276         return -(int)offset;
7277 }
7278
7279 static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
7280                                            int offset)
7281 {
7282         __u64 *array = (__u64 *)info;
7283
7284         if (offset >= 0)
7285                 return array[offset / sizeof(__u64)];
7286         return -(int)offset;
7287 }
7288
7289 static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
7290                                          __u32 val)
7291 {
7292         __u32 *array = (__u32 *)info;
7293
7294         if (offset >= 0)
7295                 array[offset / sizeof(__u32)] = val;
7296 }
7297
7298 static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
7299                                          __u64 val)
7300 {
7301         __u64 *array = (__u64 *)info;
7302
7303         if (offset >= 0)
7304                 array[offset / sizeof(__u64)] = val;
7305 }
7306
7307 struct bpf_prog_info_linear *
7308 bpf_program__get_prog_info_linear(int fd, __u64 arrays)
7309 {
7310         struct bpf_prog_info_linear *info_linear;
7311         struct bpf_prog_info info = {};
7312         __u32 info_len = sizeof(info);
7313         __u32 data_len = 0;
7314         int i, err;
7315         void *ptr;
7316
7317         if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
7318                 return ERR_PTR(-EINVAL);
7319
7320         /* step 1: get array dimensions */
7321         err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
7322         if (err) {
7323                 pr_debug("can't get prog info: %s", strerror(errno));
7324                 return ERR_PTR(-EFAULT);
7325         }
7326
7327         /* step 2: calculate total size of all arrays */
7328         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
7329                 bool include_array = (arrays & (1UL << i)) > 0;
7330                 struct bpf_prog_info_array_desc *desc;
7331                 __u32 count, size;
7332
7333                 desc = bpf_prog_info_array_desc + i;
7334
7335                 /* kernel is too old to support this field */
7336                 if (info_len < desc->array_offset + sizeof(__u32) ||
7337                     info_len < desc->count_offset + sizeof(__u32) ||
7338                     (desc->size_offset > 0 && info_len < desc->size_offset))
7339                         include_array = false;
7340
7341                 if (!include_array) {
7342                         arrays &= ~(1UL << i);  /* clear the bit */
7343                         continue;
7344                 }
7345
7346                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
7347                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
7348
7349                 data_len += count * size;
7350         }
7351
7352         /* step 3: allocate continuous memory */
7353         data_len = roundup(data_len, sizeof(__u64));
7354         info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
7355         if (!info_linear)
7356                 return ERR_PTR(-ENOMEM);
7357
7358         /* step 4: fill data to info_linear->info */
7359         info_linear->arrays = arrays;
7360         memset(&info_linear->info, 0, sizeof(info));
7361         ptr = info_linear->data;
7362
7363         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
7364                 struct bpf_prog_info_array_desc *desc;
7365                 __u32 count, size;
7366
7367                 if ((arrays & (1UL << i)) == 0)
7368                         continue;
7369
7370                 desc  = bpf_prog_info_array_desc + i;
7371                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
7372                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
7373                 bpf_prog_info_set_offset_u32(&info_linear->info,
7374                                              desc->count_offset, count);
7375                 bpf_prog_info_set_offset_u32(&info_linear->info,
7376                                              desc->size_offset, size);
7377                 bpf_prog_info_set_offset_u64(&info_linear->info,
7378                                              desc->array_offset,
7379                                              ptr_to_u64(ptr));
7380                 ptr += count * size;
7381         }
7382
7383         /* step 5: call syscall again to get required arrays */
7384         err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
7385         if (err) {
7386                 pr_debug("can't get prog info: %s", strerror(errno));
7387                 free(info_linear);
7388                 return ERR_PTR(-EFAULT);
7389         }
7390
7391         /* step 6: verify the data */
7392         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
7393                 struct bpf_prog_info_array_desc *desc;
7394                 __u32 v1, v2;
7395
7396                 if ((arrays & (1UL << i)) == 0)
7397                         continue;
7398
7399                 desc = bpf_prog_info_array_desc + i;
7400                 v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
7401                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
7402                                                    desc->count_offset);
7403                 if (v1 != v2)
7404                         pr_warn("%s: mismatch in element count\n", __func__);
7405
7406                 v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
7407                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
7408                                                    desc->size_offset);
7409                 if (v1 != v2)
7410                         pr_warn("%s: mismatch in rec size\n", __func__);
7411         }
7412
7413         /* step 7: update info_len and data_len */
7414         info_linear->info_len = sizeof(struct bpf_prog_info);
7415         info_linear->data_len = data_len;
7416
7417         return info_linear;
7418 }
7419
7420 void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
7421 {
7422         int i;
7423
7424         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
7425                 struct bpf_prog_info_array_desc *desc;
7426                 __u64 addr, offs;
7427
7428                 if ((info_linear->arrays & (1UL << i)) == 0)
7429                         continue;
7430
7431                 desc = bpf_prog_info_array_desc + i;
7432                 addr = bpf_prog_info_read_offset_u64(&info_linear->info,
7433                                                      desc->array_offset);
7434                 offs = addr - ptr_to_u64(info_linear->data);
7435                 bpf_prog_info_set_offset_u64(&info_linear->info,
7436                                              desc->array_offset, offs);
7437         }
7438 }
7439
7440 void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
7441 {
7442         int i;
7443
7444         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
7445                 struct bpf_prog_info_array_desc *desc;
7446                 __u64 addr, offs;
7447
7448                 if ((info_linear->arrays & (1UL << i)) == 0)
7449                         continue;
7450
7451                 desc = bpf_prog_info_array_desc + i;
7452                 offs = bpf_prog_info_read_offset_u64(&info_linear->info,
7453                                                      desc->array_offset);
7454                 addr = offs + ptr_to_u64(info_linear->data);
7455                 bpf_prog_info_set_offset_u64(&info_linear->info,
7456                                              desc->array_offset, addr);
7457         }
7458 }
7459
7460 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
7461 {
7462         int err = 0, n, len, start, end = -1;
7463         bool *tmp;
7464
7465         *mask = NULL;
7466         *mask_sz = 0;
7467
7468         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
7469         while (*s) {
7470                 if (*s == ',' || *s == '\n') {
7471                         s++;
7472                         continue;
7473                 }
7474                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
7475                 if (n <= 0 || n > 2) {
7476                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
7477                         err = -EINVAL;
7478                         goto cleanup;
7479                 } else if (n == 1) {
7480                         end = start;
7481                 }
7482                 if (start < 0 || start > end) {
7483                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
7484                                 start, end, s);
7485                         err = -EINVAL;
7486                         goto cleanup;
7487                 }
7488                 tmp = realloc(*mask, end + 1);
7489                 if (!tmp) {
7490                         err = -ENOMEM;
7491                         goto cleanup;
7492                 }
7493                 *mask = tmp;
7494                 memset(tmp + *mask_sz, 0, start - *mask_sz);
7495                 memset(tmp + start, 1, end - start + 1);
7496                 *mask_sz = end + 1;
7497                 s += len;
7498         }
7499         if (!*mask_sz) {
7500                 pr_warn("Empty CPU range\n");
7501                 return -EINVAL;
7502         }
7503         return 0;
7504 cleanup:
7505         free(*mask);
7506         *mask = NULL;
7507         return err;
7508 }
7509
7510 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
7511 {
7512         int fd, err = 0, len;
7513         char buf[128];
7514
7515         fd = open(fcpu, O_RDONLY);
7516         if (fd < 0) {
7517                 err = -errno;
7518                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
7519                 return err;
7520         }
7521         len = read(fd, buf, sizeof(buf));
7522         close(fd);
7523         if (len <= 0) {
7524                 err = len ? -errno : -EINVAL;
7525                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
7526                 return err;
7527         }
7528         if (len >= sizeof(buf)) {
7529                 pr_warn("CPU mask is too big in file %s\n", fcpu);
7530                 return -E2BIG;
7531         }
7532         buf[len] = '\0';
7533
7534         return parse_cpu_mask_str(buf, mask, mask_sz);
7535 }
7536
7537 int libbpf_num_possible_cpus(void)
7538 {
7539         static const char *fcpu = "/sys/devices/system/cpu/possible";
7540         static int cpus;
7541         int err, n, i, tmp_cpus;
7542         bool *mask;
7543
7544         tmp_cpus = READ_ONCE(cpus);
7545         if (tmp_cpus > 0)
7546                 return tmp_cpus;
7547
7548         err = parse_cpu_mask_file(fcpu, &mask, &n);
7549         if (err)
7550                 return err;
7551
7552         tmp_cpus = 0;
7553         for (i = 0; i < n; i++) {
7554                 if (mask[i])
7555                         tmp_cpus++;
7556         }
7557         free(mask);
7558
7559         WRITE_ONCE(cpus, tmp_cpus);
7560         return tmp_cpus;
7561 }
7562
7563 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
7564                               const struct bpf_object_open_opts *opts)
7565 {
7566         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
7567                 .object_name = s->name,
7568         );
7569         struct bpf_object *obj;
7570         int i;
7571
7572         /* Attempt to preserve opts->object_name, unless overriden by user
7573          * explicitly. Overwriting object name for skeletons is discouraged,
7574          * as it breaks global data maps, because they contain object name
7575          * prefix as their own map name prefix. When skeleton is generated,
7576          * bpftool is making an assumption that this name will stay the same.
7577          */
7578         if (opts) {
7579                 memcpy(&skel_opts, opts, sizeof(*opts));
7580                 if (!opts->object_name)
7581                         skel_opts.object_name = s->name;
7582         }
7583
7584         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
7585         if (IS_ERR(obj)) {
7586                 pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
7587                         s->name, PTR_ERR(obj));
7588                 return PTR_ERR(obj);
7589         }
7590
7591         *s->obj = obj;
7592
7593         for (i = 0; i < s->map_cnt; i++) {
7594                 struct bpf_map **map = s->maps[i].map;
7595                 const char *name = s->maps[i].name;
7596                 void **mmaped = s->maps[i].mmaped;
7597
7598                 *map = bpf_object__find_map_by_name(obj, name);
7599                 if (!*map) {
7600                         pr_warn("failed to find skeleton map '%s'\n", name);
7601                         return -ESRCH;
7602                 }
7603
7604                 /* externs shouldn't be pre-setup from user code */
7605                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
7606                         *mmaped = (*map)->mmaped;
7607         }
7608
7609         for (i = 0; i < s->prog_cnt; i++) {
7610                 struct bpf_program **prog = s->progs[i].prog;
7611                 const char *name = s->progs[i].name;
7612
7613                 *prog = bpf_object__find_program_by_name(obj, name);
7614                 if (!*prog) {
7615                         pr_warn("failed to find skeleton program '%s'\n", name);
7616                         return -ESRCH;
7617                 }
7618         }
7619
7620         return 0;
7621 }
7622
7623 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
7624 {
7625         int i, err;
7626
7627         err = bpf_object__load(*s->obj);
7628         if (err) {
7629                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
7630                 return err;
7631         }
7632
7633         for (i = 0; i < s->map_cnt; i++) {
7634                 struct bpf_map *map = *s->maps[i].map;
7635                 size_t mmap_sz = bpf_map_mmap_sz(map);
7636                 int prot, map_fd = bpf_map__fd(map);
7637                 void **mmaped = s->maps[i].mmaped;
7638
7639                 if (!mmaped)
7640                         continue;
7641
7642                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
7643                         *mmaped = NULL;
7644                         continue;
7645                 }
7646
7647                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
7648                         prot = PROT_READ;
7649                 else
7650                         prot = PROT_READ | PROT_WRITE;
7651
7652                 /* Remap anonymous mmap()-ed "map initialization image" as
7653                  * a BPF map-backed mmap()-ed memory, but preserving the same
7654                  * memory address. This will cause kernel to change process'
7655                  * page table to point to a different piece of kernel memory,
7656                  * but from userspace point of view memory address (and its
7657                  * contents, being identical at this point) will stay the
7658                  * same. This mapping will be released by bpf_object__close()
7659                  * as per normal clean up procedure, so we don't need to worry
7660                  * about it from skeleton's clean up perspective.
7661                  */
7662                 *mmaped = mmap(map->mmaped, mmap_sz, prot,
7663                                 MAP_SHARED | MAP_FIXED, map_fd, 0);
7664                 if (*mmaped == MAP_FAILED) {
7665                         err = -errno;
7666                         *mmaped = NULL;
7667                         pr_warn("failed to re-mmap() map '%s': %d\n",
7668                                  bpf_map__name(map), err);
7669                         return err;
7670                 }
7671         }
7672
7673         return 0;
7674 }
7675
7676 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
7677 {
7678         int i;
7679
7680         for (i = 0; i < s->prog_cnt; i++) {
7681                 struct bpf_program *prog = *s->progs[i].prog;
7682                 struct bpf_link **link = s->progs[i].link;
7683                 const struct bpf_sec_def *sec_def;
7684                 const char *sec_name = bpf_program__title(prog, false);
7685
7686                 sec_def = find_sec_def(sec_name);
7687                 if (!sec_def || !sec_def->attach_fn)
7688                         continue;
7689
7690                 *link = sec_def->attach_fn(sec_def, prog);
7691                 if (IS_ERR(*link)) {
7692                         pr_warn("failed to auto-attach program '%s': %ld\n",
7693                                 bpf_program__name(prog), PTR_ERR(*link));
7694                         return PTR_ERR(*link);
7695                 }
7696         }
7697
7698         return 0;
7699 }
7700
7701 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
7702 {
7703         int i;
7704
7705         for (i = 0; i < s->prog_cnt; i++) {
7706                 struct bpf_link **link = s->progs[i].link;
7707
7708                 if (!IS_ERR_OR_NULL(*link))
7709                         bpf_link__destroy(*link);
7710                 *link = NULL;
7711         }
7712 }
7713
7714 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
7715 {
7716         if (s->progs)
7717                 bpf_object__detach_skeleton(s);
7718         if (s->obj)
7719                 bpf_object__close(*s->obj);
7720         free(s->maps);
7721         free(s->progs);
7722         free(s);
7723 }