Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[linux-2.6-microblaze.git] / tools / lib / bpf / libbpf.c
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4  * Common eBPF ELF object loading operations.
5  *
6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8  * Copyright (C) 2015 Huawei Inc.
9  * Copyright (C) 2017 Nicira, Inc.
10  * Copyright (C) 2019 Isovalent, Inc.
11  */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <limits.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <endian.h>
25 #include <fcntl.h>
26 #include <errno.h>
27 #include <ctype.h>
28 #include <asm/unistd.h>
29 #include <linux/err.h>
30 #include <linux/kernel.h>
31 #include <linux/bpf.h>
32 #include <linux/btf.h>
33 #include <linux/filter.h>
34 #include <linux/list.h>
35 #include <linux/limits.h>
36 #include <linux/perf_event.h>
37 #include <linux/ring_buffer.h>
38 #include <linux/version.h>
39 #include <sys/epoll.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44 #include <sys/vfs.h>
45 #include <sys/utsname.h>
46 #include <sys/resource.h>
47 #include <libelf.h>
48 #include <gelf.h>
49 #include <zlib.h>
50
51 #include "libbpf.h"
52 #include "bpf.h"
53 #include "btf.h"
54 #include "str_error.h"
55 #include "libbpf_internal.h"
56 #include "hashmap.h"
57
58 #ifndef EM_BPF
59 #define EM_BPF 247
60 #endif
61
62 #ifndef BPF_FS_MAGIC
63 #define BPF_FS_MAGIC            0xcafe4a11
64 #endif
65
66 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
67
68 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
69  * compilation if user enables corresponding warning. Disable it explicitly.
70  */
71 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
72
73 #define __printf(a, b)  __attribute__((format(printf, a, b)))
74
75 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
76 static const struct btf_type *
77 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
78
79 static int __base_pr(enum libbpf_print_level level, const char *format,
80                      va_list args)
81 {
82         if (level == LIBBPF_DEBUG)
83                 return 0;
84
85         return vfprintf(stderr, format, args);
86 }
87
88 static libbpf_print_fn_t __libbpf_pr = __base_pr;
89
90 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
91 {
92         libbpf_print_fn_t old_print_fn = __libbpf_pr;
93
94         __libbpf_pr = fn;
95         return old_print_fn;
96 }
97
98 __printf(2, 3)
99 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
100 {
101         va_list args;
102
103         if (!__libbpf_pr)
104                 return;
105
106         va_start(args, format);
107         __libbpf_pr(level, format, args);
108         va_end(args);
109 }
110
111 static void pr_perm_msg(int err)
112 {
113         struct rlimit limit;
114         char buf[100];
115
116         if (err != -EPERM || geteuid() != 0)
117                 return;
118
119         err = getrlimit(RLIMIT_MEMLOCK, &limit);
120         if (err)
121                 return;
122
123         if (limit.rlim_cur == RLIM_INFINITY)
124                 return;
125
126         if (limit.rlim_cur < 1024)
127                 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
128         else if (limit.rlim_cur < 1024*1024)
129                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
130         else
131                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
132
133         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
134                 buf);
135 }
136
137 #define STRERR_BUFSIZE  128
138
139 /* Copied from tools/perf/util/util.h */
140 #ifndef zfree
141 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
142 #endif
143
144 #ifndef zclose
145 # define zclose(fd) ({                  \
146         int ___err = 0;                 \
147         if ((fd) >= 0)                  \
148                 ___err = close((fd));   \
149         fd = -1;                        \
150         ___err; })
151 #endif
152
153 static inline __u64 ptr_to_u64(const void *ptr)
154 {
155         return (__u64) (unsigned long) ptr;
156 }
157
158 enum kern_feature_id {
159         /* v4.14: kernel support for program & map names. */
160         FEAT_PROG_NAME,
161         /* v5.2: kernel support for global data sections. */
162         FEAT_GLOBAL_DATA,
163         /* BTF support */
164         FEAT_BTF,
165         /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
166         FEAT_BTF_FUNC,
167         /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
168         FEAT_BTF_DATASEC,
169         /* BTF_FUNC_GLOBAL is supported */
170         FEAT_BTF_GLOBAL_FUNC,
171         /* BPF_F_MMAPABLE is supported for arrays */
172         FEAT_ARRAY_MMAP,
173         /* kernel support for expected_attach_type in BPF_PROG_LOAD */
174         FEAT_EXP_ATTACH_TYPE,
175         /* bpf_probe_read_{kernel,user}[_str] helpers */
176         FEAT_PROBE_READ_KERN,
177         /* BPF_PROG_BIND_MAP is supported */
178         FEAT_PROG_BIND_MAP,
179         __FEAT_CNT,
180 };
181
182 static bool kernel_supports(enum kern_feature_id feat_id);
183
184 enum reloc_type {
185         RELO_LD64,
186         RELO_CALL,
187         RELO_DATA,
188         RELO_EXTERN,
189 };
190
191 struct reloc_desc {
192         enum reloc_type type;
193         int insn_idx;
194         int map_idx;
195         int sym_off;
196         bool processed;
197 };
198
199 struct bpf_sec_def;
200
201 typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
202                                         struct bpf_program *prog);
203
204 struct bpf_sec_def {
205         const char *sec;
206         size_t len;
207         enum bpf_prog_type prog_type;
208         enum bpf_attach_type expected_attach_type;
209         bool is_exp_attach_type_optional;
210         bool is_attachable;
211         bool is_attach_btf;
212         bool is_sleepable;
213         attach_fn_t attach_fn;
214 };
215
216 /*
217  * bpf_prog should be a better name but it has been used in
218  * linux/filter.h.
219  */
220 struct bpf_program {
221         const struct bpf_sec_def *sec_def;
222         char *sec_name;
223         size_t sec_idx;
224         /* this program's instruction offset (in number of instructions)
225          * within its containing ELF section
226          */
227         size_t sec_insn_off;
228         /* number of original instructions in ELF section belonging to this
229          * program, not taking into account subprogram instructions possible
230          * appended later during relocation
231          */
232         size_t sec_insn_cnt;
233         /* Offset (in number of instructions) of the start of instruction
234          * belonging to this BPF program  within its containing main BPF
235          * program. For the entry-point (main) BPF program, this is always
236          * zero. For a sub-program, this gets reset before each of main BPF
237          * programs are processed and relocated and is used to determined
238          * whether sub-program was already appended to the main program, and
239          * if yes, at which instruction offset.
240          */
241         size_t sub_insn_off;
242
243         char *name;
244         /* sec_name with / replaced by _; makes recursive pinning
245          * in bpf_object__pin_programs easier
246          */
247         char *pin_name;
248
249         /* instructions that belong to BPF program; insns[0] is located at
250          * sec_insn_off instruction within its ELF section in ELF file, so
251          * when mapping ELF file instruction index to the local instruction,
252          * one needs to subtract sec_insn_off; and vice versa.
253          */
254         struct bpf_insn *insns;
255         /* actual number of instruction in this BPF program's image; for
256          * entry-point BPF programs this includes the size of main program
257          * itself plus all the used sub-programs, appended at the end
258          */
259         size_t insns_cnt;
260
261         struct reloc_desc *reloc_desc;
262         int nr_reloc;
263         int log_level;
264
265         struct {
266                 int nr;
267                 int *fds;
268         } instances;
269         bpf_program_prep_t preprocessor;
270
271         struct bpf_object *obj;
272         void *priv;
273         bpf_program_clear_priv_t clear_priv;
274
275         bool load;
276         enum bpf_prog_type type;
277         enum bpf_attach_type expected_attach_type;
278         int prog_ifindex;
279         __u32 attach_btf_id;
280         __u32 attach_prog_fd;
281         void *func_info;
282         __u32 func_info_rec_size;
283         __u32 func_info_cnt;
284
285         void *line_info;
286         __u32 line_info_rec_size;
287         __u32 line_info_cnt;
288         __u32 prog_flags;
289 };
290
291 struct bpf_struct_ops {
292         const char *tname;
293         const struct btf_type *type;
294         struct bpf_program **progs;
295         __u32 *kern_func_off;
296         /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
297         void *data;
298         /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
299          *      btf_vmlinux's format.
300          * struct bpf_struct_ops_tcp_congestion_ops {
301          *      [... some other kernel fields ...]
302          *      struct tcp_congestion_ops data;
303          * }
304          * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
305          * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
306          * from "data".
307          */
308         void *kern_vdata;
309         __u32 type_id;
310 };
311
312 #define DATA_SEC ".data"
313 #define BSS_SEC ".bss"
314 #define RODATA_SEC ".rodata"
315 #define KCONFIG_SEC ".kconfig"
316 #define KSYMS_SEC ".ksyms"
317 #define STRUCT_OPS_SEC ".struct_ops"
318
319 enum libbpf_map_type {
320         LIBBPF_MAP_UNSPEC,
321         LIBBPF_MAP_DATA,
322         LIBBPF_MAP_BSS,
323         LIBBPF_MAP_RODATA,
324         LIBBPF_MAP_KCONFIG,
325 };
326
327 static const char * const libbpf_type_to_btf_name[] = {
328         [LIBBPF_MAP_DATA]       = DATA_SEC,
329         [LIBBPF_MAP_BSS]        = BSS_SEC,
330         [LIBBPF_MAP_RODATA]     = RODATA_SEC,
331         [LIBBPF_MAP_KCONFIG]    = KCONFIG_SEC,
332 };
333
334 struct bpf_map {
335         char *name;
336         int fd;
337         int sec_idx;
338         size_t sec_offset;
339         int map_ifindex;
340         int inner_map_fd;
341         struct bpf_map_def def;
342         __u32 numa_node;
343         __u32 btf_var_idx;
344         __u32 btf_key_type_id;
345         __u32 btf_value_type_id;
346         __u32 btf_vmlinux_value_type_id;
347         void *priv;
348         bpf_map_clear_priv_t clear_priv;
349         enum libbpf_map_type libbpf_type;
350         void *mmaped;
351         struct bpf_struct_ops *st_ops;
352         struct bpf_map *inner_map;
353         void **init_slots;
354         int init_slots_sz;
355         char *pin_path;
356         bool pinned;
357         bool reused;
358 };
359
360 enum extern_type {
361         EXT_UNKNOWN,
362         EXT_KCFG,
363         EXT_KSYM,
364 };
365
366 enum kcfg_type {
367         KCFG_UNKNOWN,
368         KCFG_CHAR,
369         KCFG_BOOL,
370         KCFG_INT,
371         KCFG_TRISTATE,
372         KCFG_CHAR_ARR,
373 };
374
375 struct extern_desc {
376         enum extern_type type;
377         int sym_idx;
378         int btf_id;
379         int sec_btf_id;
380         const char *name;
381         bool is_set;
382         bool is_weak;
383         union {
384                 struct {
385                         enum kcfg_type type;
386                         int sz;
387                         int align;
388                         int data_off;
389                         bool is_signed;
390                 } kcfg;
391                 struct {
392                         unsigned long long addr;
393                 } ksym;
394         };
395 };
396
397 static LIST_HEAD(bpf_objects_list);
398
399 struct bpf_object {
400         char name[BPF_OBJ_NAME_LEN];
401         char license[64];
402         __u32 kern_version;
403
404         struct bpf_program *programs;
405         size_t nr_programs;
406         struct bpf_map *maps;
407         size_t nr_maps;
408         size_t maps_cap;
409
410         char *kconfig;
411         struct extern_desc *externs;
412         int nr_extern;
413         int kconfig_map_idx;
414         int rodata_map_idx;
415
416         bool loaded;
417         bool has_subcalls;
418
419         /*
420          * Information when doing elf related work. Only valid if fd
421          * is valid.
422          */
423         struct {
424                 int fd;
425                 const void *obj_buf;
426                 size_t obj_buf_sz;
427                 Elf *elf;
428                 GElf_Ehdr ehdr;
429                 Elf_Data *symbols;
430                 Elf_Data *data;
431                 Elf_Data *rodata;
432                 Elf_Data *bss;
433                 Elf_Data *st_ops_data;
434                 size_t shstrndx; /* section index for section name strings */
435                 size_t strtabidx;
436                 struct {
437                         GElf_Shdr shdr;
438                         Elf_Data *data;
439                 } *reloc_sects;
440                 int nr_reloc_sects;
441                 int maps_shndx;
442                 int btf_maps_shndx;
443                 __u32 btf_maps_sec_btf_id;
444                 int text_shndx;
445                 int symbols_shndx;
446                 int data_shndx;
447                 int rodata_shndx;
448                 int bss_shndx;
449                 int st_ops_shndx;
450         } efile;
451         /*
452          * All loaded bpf_object is linked in a list, which is
453          * hidden to caller. bpf_objects__<func> handlers deal with
454          * all objects.
455          */
456         struct list_head list;
457
458         struct btf *btf;
459         /* Parse and load BTF vmlinux if any of the programs in the object need
460          * it at load time.
461          */
462         struct btf *btf_vmlinux;
463         struct btf_ext *btf_ext;
464
465         void *priv;
466         bpf_object_clear_priv_t clear_priv;
467
468         char path[];
469 };
470 #define obj_elf_valid(o)        ((o)->efile.elf)
471
472 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
473 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
474 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
475 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
476 static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
477 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
478 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
479 static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
480                               size_t off, __u32 sym_type, GElf_Sym *sym);
481
482 void bpf_program__unload(struct bpf_program *prog)
483 {
484         int i;
485
486         if (!prog)
487                 return;
488
489         /*
490          * If the object is opened but the program was never loaded,
491          * it is possible that prog->instances.nr == -1.
492          */
493         if (prog->instances.nr > 0) {
494                 for (i = 0; i < prog->instances.nr; i++)
495                         zclose(prog->instances.fds[i]);
496         } else if (prog->instances.nr != -1) {
497                 pr_warn("Internal error: instances.nr is %d\n",
498                         prog->instances.nr);
499         }
500
501         prog->instances.nr = -1;
502         zfree(&prog->instances.fds);
503
504         zfree(&prog->func_info);
505         zfree(&prog->line_info);
506 }
507
508 static void bpf_program__exit(struct bpf_program *prog)
509 {
510         if (!prog)
511                 return;
512
513         if (prog->clear_priv)
514                 prog->clear_priv(prog, prog->priv);
515
516         prog->priv = NULL;
517         prog->clear_priv = NULL;
518
519         bpf_program__unload(prog);
520         zfree(&prog->name);
521         zfree(&prog->sec_name);
522         zfree(&prog->pin_name);
523         zfree(&prog->insns);
524         zfree(&prog->reloc_desc);
525
526         prog->nr_reloc = 0;
527         prog->insns_cnt = 0;
528         prog->sec_idx = -1;
529 }
530
531 static char *__bpf_program__pin_name(struct bpf_program *prog)
532 {
533         char *name, *p;
534
535         name = p = strdup(prog->sec_name);
536         while ((p = strchr(p, '/')))
537                 *p = '_';
538
539         return name;
540 }
541
542 static bool insn_is_subprog_call(const struct bpf_insn *insn)
543 {
544         return BPF_CLASS(insn->code) == BPF_JMP &&
545                BPF_OP(insn->code) == BPF_CALL &&
546                BPF_SRC(insn->code) == BPF_K &&
547                insn->src_reg == BPF_PSEUDO_CALL &&
548                insn->dst_reg == 0 &&
549                insn->off == 0;
550 }
551
552 static int
553 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
554                       const char *name, size_t sec_idx, const char *sec_name,
555                       size_t sec_off, void *insn_data, size_t insn_data_sz)
556 {
557         int i;
558
559         if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
560                 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
561                         sec_name, name, sec_off, insn_data_sz);
562                 return -EINVAL;
563         }
564
565         memset(prog, 0, sizeof(*prog));
566         prog->obj = obj;
567
568         prog->sec_idx = sec_idx;
569         prog->sec_insn_off = sec_off / BPF_INSN_SZ;
570         prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
571         /* insns_cnt can later be increased by appending used subprograms */
572         prog->insns_cnt = prog->sec_insn_cnt;
573
574         prog->type = BPF_PROG_TYPE_UNSPEC;
575         prog->load = true;
576
577         prog->instances.fds = NULL;
578         prog->instances.nr = -1;
579
580         prog->sec_name = strdup(sec_name);
581         if (!prog->sec_name)
582                 goto errout;
583
584         prog->name = strdup(name);
585         if (!prog->name)
586                 goto errout;
587
588         prog->pin_name = __bpf_program__pin_name(prog);
589         if (!prog->pin_name)
590                 goto errout;
591
592         prog->insns = malloc(insn_data_sz);
593         if (!prog->insns)
594                 goto errout;
595         memcpy(prog->insns, insn_data, insn_data_sz);
596
597         for (i = 0; i < prog->insns_cnt; i++) {
598                 if (insn_is_subprog_call(&prog->insns[i])) {
599                         obj->has_subcalls = true;
600                         break;
601                 }
602         }
603
604         return 0;
605 errout:
606         pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
607         bpf_program__exit(prog);
608         return -ENOMEM;
609 }
610
611 static int
612 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
613                          const char *sec_name, int sec_idx)
614 {
615         struct bpf_program *prog, *progs;
616         void *data = sec_data->d_buf;
617         size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
618         int nr_progs, err;
619         const char *name;
620         GElf_Sym sym;
621
622         progs = obj->programs;
623         nr_progs = obj->nr_programs;
624         sec_off = 0;
625
626         while (sec_off < sec_sz) {
627                 if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
628                         pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
629                                 sec_name, sec_off);
630                         return -LIBBPF_ERRNO__FORMAT;
631                 }
632
633                 prog_sz = sym.st_size;
634
635                 name = elf_sym_str(obj, sym.st_name);
636                 if (!name) {
637                         pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
638                                 sec_name, sec_off);
639                         return -LIBBPF_ERRNO__FORMAT;
640                 }
641
642                 if (sec_off + prog_sz > sec_sz) {
643                         pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
644                                 sec_name, sec_off);
645                         return -LIBBPF_ERRNO__FORMAT;
646                 }
647
648                 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
649                          sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
650
651                 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
652                 if (!progs) {
653                         /*
654                          * In this case the original obj->programs
655                          * is still valid, so don't need special treat for
656                          * bpf_close_object().
657                          */
658                         pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
659                                 sec_name, name);
660                         return -ENOMEM;
661                 }
662                 obj->programs = progs;
663
664                 prog = &progs[nr_progs];
665
666                 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
667                                             sec_off, data + sec_off, prog_sz);
668                 if (err)
669                         return err;
670
671                 nr_progs++;
672                 obj->nr_programs = nr_progs;
673
674                 sec_off += prog_sz;
675         }
676
677         return 0;
678 }
679
680 static __u32 get_kernel_version(void)
681 {
682         __u32 major, minor, patch;
683         struct utsname info;
684
685         uname(&info);
686         if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
687                 return 0;
688         return KERNEL_VERSION(major, minor, patch);
689 }
690
691 static const struct btf_member *
692 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
693 {
694         struct btf_member *m;
695         int i;
696
697         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
698                 if (btf_member_bit_offset(t, i) == bit_offset)
699                         return m;
700         }
701
702         return NULL;
703 }
704
705 static const struct btf_member *
706 find_member_by_name(const struct btf *btf, const struct btf_type *t,
707                     const char *name)
708 {
709         struct btf_member *m;
710         int i;
711
712         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
713                 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
714                         return m;
715         }
716
717         return NULL;
718 }
719
720 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
721 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
722                                    const char *name, __u32 kind);
723
724 static int
725 find_struct_ops_kern_types(const struct btf *btf, const char *tname,
726                            const struct btf_type **type, __u32 *type_id,
727                            const struct btf_type **vtype, __u32 *vtype_id,
728                            const struct btf_member **data_member)
729 {
730         const struct btf_type *kern_type, *kern_vtype;
731         const struct btf_member *kern_data_member;
732         __s32 kern_vtype_id, kern_type_id;
733         __u32 i;
734
735         kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
736         if (kern_type_id < 0) {
737                 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
738                         tname);
739                 return kern_type_id;
740         }
741         kern_type = btf__type_by_id(btf, kern_type_id);
742
743         /* Find the corresponding "map_value" type that will be used
744          * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
745          * find "struct bpf_struct_ops_tcp_congestion_ops" from the
746          * btf_vmlinux.
747          */
748         kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
749                                                 tname, BTF_KIND_STRUCT);
750         if (kern_vtype_id < 0) {
751                 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
752                         STRUCT_OPS_VALUE_PREFIX, tname);
753                 return kern_vtype_id;
754         }
755         kern_vtype = btf__type_by_id(btf, kern_vtype_id);
756
757         /* Find "struct tcp_congestion_ops" from
758          * struct bpf_struct_ops_tcp_congestion_ops {
759          *      [ ... ]
760          *      struct tcp_congestion_ops data;
761          * }
762          */
763         kern_data_member = btf_members(kern_vtype);
764         for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
765                 if (kern_data_member->type == kern_type_id)
766                         break;
767         }
768         if (i == btf_vlen(kern_vtype)) {
769                 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
770                         tname, STRUCT_OPS_VALUE_PREFIX, tname);
771                 return -EINVAL;
772         }
773
774         *type = kern_type;
775         *type_id = kern_type_id;
776         *vtype = kern_vtype;
777         *vtype_id = kern_vtype_id;
778         *data_member = kern_data_member;
779
780         return 0;
781 }
782
783 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
784 {
785         return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
786 }
787
788 /* Init the map's fields that depend on kern_btf */
789 static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
790                                          const struct btf *btf,
791                                          const struct btf *kern_btf)
792 {
793         const struct btf_member *member, *kern_member, *kern_data_member;
794         const struct btf_type *type, *kern_type, *kern_vtype;
795         __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
796         struct bpf_struct_ops *st_ops;
797         void *data, *kern_data;
798         const char *tname;
799         int err;
800
801         st_ops = map->st_ops;
802         type = st_ops->type;
803         tname = st_ops->tname;
804         err = find_struct_ops_kern_types(kern_btf, tname,
805                                          &kern_type, &kern_type_id,
806                                          &kern_vtype, &kern_vtype_id,
807                                          &kern_data_member);
808         if (err)
809                 return err;
810
811         pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
812                  map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
813
814         map->def.value_size = kern_vtype->size;
815         map->btf_vmlinux_value_type_id = kern_vtype_id;
816
817         st_ops->kern_vdata = calloc(1, kern_vtype->size);
818         if (!st_ops->kern_vdata)
819                 return -ENOMEM;
820
821         data = st_ops->data;
822         kern_data_off = kern_data_member->offset / 8;
823         kern_data = st_ops->kern_vdata + kern_data_off;
824
825         member = btf_members(type);
826         for (i = 0; i < btf_vlen(type); i++, member++) {
827                 const struct btf_type *mtype, *kern_mtype;
828                 __u32 mtype_id, kern_mtype_id;
829                 void *mdata, *kern_mdata;
830                 __s64 msize, kern_msize;
831                 __u32 moff, kern_moff;
832                 __u32 kern_member_idx;
833                 const char *mname;
834
835                 mname = btf__name_by_offset(btf, member->name_off);
836                 kern_member = find_member_by_name(kern_btf, kern_type, mname);
837                 if (!kern_member) {
838                         pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
839                                 map->name, mname);
840                         return -ENOTSUP;
841                 }
842
843                 kern_member_idx = kern_member - btf_members(kern_type);
844                 if (btf_member_bitfield_size(type, i) ||
845                     btf_member_bitfield_size(kern_type, kern_member_idx)) {
846                         pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
847                                 map->name, mname);
848                         return -ENOTSUP;
849                 }
850
851                 moff = member->offset / 8;
852                 kern_moff = kern_member->offset / 8;
853
854                 mdata = data + moff;
855                 kern_mdata = kern_data + kern_moff;
856
857                 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
858                 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
859                                                     &kern_mtype_id);
860                 if (BTF_INFO_KIND(mtype->info) !=
861                     BTF_INFO_KIND(kern_mtype->info)) {
862                         pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
863                                 map->name, mname, BTF_INFO_KIND(mtype->info),
864                                 BTF_INFO_KIND(kern_mtype->info));
865                         return -ENOTSUP;
866                 }
867
868                 if (btf_is_ptr(mtype)) {
869                         struct bpf_program *prog;
870
871                         mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id);
872                         kern_mtype = skip_mods_and_typedefs(kern_btf,
873                                                             kern_mtype->type,
874                                                             &kern_mtype_id);
875                         if (!btf_is_func_proto(mtype) ||
876                             !btf_is_func_proto(kern_mtype)) {
877                                 pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n",
878                                         map->name, mname);
879                                 return -ENOTSUP;
880                         }
881
882                         prog = st_ops->progs[i];
883                         if (!prog) {
884                                 pr_debug("struct_ops init_kern %s: func ptr %s is not set\n",
885                                          map->name, mname);
886                                 continue;
887                         }
888
889                         prog->attach_btf_id = kern_type_id;
890                         prog->expected_attach_type = kern_member_idx;
891
892                         st_ops->kern_func_off[i] = kern_data_off + kern_moff;
893
894                         pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
895                                  map->name, mname, prog->name, moff,
896                                  kern_moff);
897
898                         continue;
899                 }
900
901                 msize = btf__resolve_size(btf, mtype_id);
902                 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
903                 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
904                         pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
905                                 map->name, mname, (ssize_t)msize,
906                                 (ssize_t)kern_msize);
907                         return -ENOTSUP;
908                 }
909
910                 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
911                          map->name, mname, (unsigned int)msize,
912                          moff, kern_moff);
913                 memcpy(kern_mdata, mdata, msize);
914         }
915
916         return 0;
917 }
918
919 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
920 {
921         struct bpf_map *map;
922         size_t i;
923         int err;
924
925         for (i = 0; i < obj->nr_maps; i++) {
926                 map = &obj->maps[i];
927
928                 if (!bpf_map__is_struct_ops(map))
929                         continue;
930
931                 err = bpf_map__init_kern_struct_ops(map, obj->btf,
932                                                     obj->btf_vmlinux);
933                 if (err)
934                         return err;
935         }
936
937         return 0;
938 }
939
940 static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
941 {
942         const struct btf_type *type, *datasec;
943         const struct btf_var_secinfo *vsi;
944         struct bpf_struct_ops *st_ops;
945         const char *tname, *var_name;
946         __s32 type_id, datasec_id;
947         const struct btf *btf;
948         struct bpf_map *map;
949         __u32 i;
950
951         if (obj->efile.st_ops_shndx == -1)
952                 return 0;
953
954         btf = obj->btf;
955         datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
956                                             BTF_KIND_DATASEC);
957         if (datasec_id < 0) {
958                 pr_warn("struct_ops init: DATASEC %s not found\n",
959                         STRUCT_OPS_SEC);
960                 return -EINVAL;
961         }
962
963         datasec = btf__type_by_id(btf, datasec_id);
964         vsi = btf_var_secinfos(datasec);
965         for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
966                 type = btf__type_by_id(obj->btf, vsi->type);
967                 var_name = btf__name_by_offset(obj->btf, type->name_off);
968
969                 type_id = btf__resolve_type(obj->btf, vsi->type);
970                 if (type_id < 0) {
971                         pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
972                                 vsi->type, STRUCT_OPS_SEC);
973                         return -EINVAL;
974                 }
975
976                 type = btf__type_by_id(obj->btf, type_id);
977                 tname = btf__name_by_offset(obj->btf, type->name_off);
978                 if (!tname[0]) {
979                         pr_warn("struct_ops init: anonymous type is not supported\n");
980                         return -ENOTSUP;
981                 }
982                 if (!btf_is_struct(type)) {
983                         pr_warn("struct_ops init: %s is not a struct\n", tname);
984                         return -EINVAL;
985                 }
986
987                 map = bpf_object__add_map(obj);
988                 if (IS_ERR(map))
989                         return PTR_ERR(map);
990
991                 map->sec_idx = obj->efile.st_ops_shndx;
992                 map->sec_offset = vsi->offset;
993                 map->name = strdup(var_name);
994                 if (!map->name)
995                         return -ENOMEM;
996
997                 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
998                 map->def.key_size = sizeof(int);
999                 map->def.value_size = type->size;
1000                 map->def.max_entries = 1;
1001
1002                 map->st_ops = calloc(1, sizeof(*map->st_ops));
1003                 if (!map->st_ops)
1004                         return -ENOMEM;
1005                 st_ops = map->st_ops;
1006                 st_ops->data = malloc(type->size);
1007                 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1008                 st_ops->kern_func_off = malloc(btf_vlen(type) *
1009                                                sizeof(*st_ops->kern_func_off));
1010                 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1011                         return -ENOMEM;
1012
1013                 if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
1014                         pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1015                                 var_name, STRUCT_OPS_SEC);
1016                         return -EINVAL;
1017                 }
1018
1019                 memcpy(st_ops->data,
1020                        obj->efile.st_ops_data->d_buf + vsi->offset,
1021                        type->size);
1022                 st_ops->tname = tname;
1023                 st_ops->type = type;
1024                 st_ops->type_id = type_id;
1025
1026                 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1027                          tname, type_id, var_name, vsi->offset);
1028         }
1029
1030         return 0;
1031 }
1032
1033 static struct bpf_object *bpf_object__new(const char *path,
1034                                           const void *obj_buf,
1035                                           size_t obj_buf_sz,
1036                                           const char *obj_name)
1037 {
1038         struct bpf_object *obj;
1039         char *end;
1040
1041         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1042         if (!obj) {
1043                 pr_warn("alloc memory failed for %s\n", path);
1044                 return ERR_PTR(-ENOMEM);
1045         }
1046
1047         strcpy(obj->path, path);
1048         if (obj_name) {
1049                 strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
1050                 obj->name[sizeof(obj->name) - 1] = 0;
1051         } else {
1052                 /* Using basename() GNU version which doesn't modify arg. */
1053                 strncpy(obj->name, basename((void *)path),
1054                         sizeof(obj->name) - 1);
1055                 end = strchr(obj->name, '.');
1056                 if (end)
1057                         *end = 0;
1058         }
1059
1060         obj->efile.fd = -1;
1061         /*
1062          * Caller of this function should also call
1063          * bpf_object__elf_finish() after data collection to return
1064          * obj_buf to user. If not, we should duplicate the buffer to
1065          * avoid user freeing them before elf finish.
1066          */
1067         obj->efile.obj_buf = obj_buf;
1068         obj->efile.obj_buf_sz = obj_buf_sz;
1069         obj->efile.maps_shndx = -1;
1070         obj->efile.btf_maps_shndx = -1;
1071         obj->efile.data_shndx = -1;
1072         obj->efile.rodata_shndx = -1;
1073         obj->efile.bss_shndx = -1;
1074         obj->efile.st_ops_shndx = -1;
1075         obj->kconfig_map_idx = -1;
1076         obj->rodata_map_idx = -1;
1077
1078         obj->kern_version = get_kernel_version();
1079         obj->loaded = false;
1080
1081         INIT_LIST_HEAD(&obj->list);
1082         list_add(&obj->list, &bpf_objects_list);
1083         return obj;
1084 }
1085
1086 static void bpf_object__elf_finish(struct bpf_object *obj)
1087 {
1088         if (!obj_elf_valid(obj))
1089                 return;
1090
1091         if (obj->efile.elf) {
1092                 elf_end(obj->efile.elf);
1093                 obj->efile.elf = NULL;
1094         }
1095         obj->efile.symbols = NULL;
1096         obj->efile.data = NULL;
1097         obj->efile.rodata = NULL;
1098         obj->efile.bss = NULL;
1099         obj->efile.st_ops_data = NULL;
1100
1101         zfree(&obj->efile.reloc_sects);
1102         obj->efile.nr_reloc_sects = 0;
1103         zclose(obj->efile.fd);
1104         obj->efile.obj_buf = NULL;
1105         obj->efile.obj_buf_sz = 0;
1106 }
1107
1108 /* if libelf is old and doesn't support mmap(), fall back to read() */
1109 #ifndef ELF_C_READ_MMAP
1110 #define ELF_C_READ_MMAP ELF_C_READ
1111 #endif
1112
1113 static int bpf_object__elf_init(struct bpf_object *obj)
1114 {
1115         int err = 0;
1116         GElf_Ehdr *ep;
1117
1118         if (obj_elf_valid(obj)) {
1119                 pr_warn("elf: init internal error\n");
1120                 return -LIBBPF_ERRNO__LIBELF;
1121         }
1122
1123         if (obj->efile.obj_buf_sz > 0) {
1124                 /*
1125                  * obj_buf should have been validated by
1126                  * bpf_object__open_buffer().
1127                  */
1128                 obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
1129                                             obj->efile.obj_buf_sz);
1130         } else {
1131                 obj->efile.fd = open(obj->path, O_RDONLY);
1132                 if (obj->efile.fd < 0) {
1133                         char errmsg[STRERR_BUFSIZE], *cp;
1134
1135                         err = -errno;
1136                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1137                         pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1138                         return err;
1139                 }
1140
1141                 obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1142         }
1143
1144         if (!obj->efile.elf) {
1145                 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1146                 err = -LIBBPF_ERRNO__LIBELF;
1147                 goto errout;
1148         }
1149
1150         if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
1151                 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1152                 err = -LIBBPF_ERRNO__FORMAT;
1153                 goto errout;
1154         }
1155         ep = &obj->efile.ehdr;
1156
1157         if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
1158                 pr_warn("elf: failed to get section names section index for %s: %s\n",
1159                         obj->path, elf_errmsg(-1));
1160                 err = -LIBBPF_ERRNO__FORMAT;
1161                 goto errout;
1162         }
1163
1164         /* Elf is corrupted/truncated, avoid calling elf_strptr. */
1165         if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
1166                 pr_warn("elf: failed to get section names strings from %s: %s\n",
1167                         obj->path, elf_errmsg(-1));
1168                 return -LIBBPF_ERRNO__FORMAT;
1169         }
1170
1171         /* Old LLVM set e_machine to EM_NONE */
1172         if (ep->e_type != ET_REL ||
1173             (ep->e_machine && ep->e_machine != EM_BPF)) {
1174                 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1175                 err = -LIBBPF_ERRNO__FORMAT;
1176                 goto errout;
1177         }
1178
1179         return 0;
1180 errout:
1181         bpf_object__elf_finish(obj);
1182         return err;
1183 }
1184
1185 static int bpf_object__check_endianness(struct bpf_object *obj)
1186 {
1187 #if __BYTE_ORDER == __LITTLE_ENDIAN
1188         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
1189                 return 0;
1190 #elif __BYTE_ORDER == __BIG_ENDIAN
1191         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
1192                 return 0;
1193 #else
1194 # error "Unrecognized __BYTE_ORDER__"
1195 #endif
1196         pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1197         return -LIBBPF_ERRNO__ENDIAN;
1198 }
1199
1200 static int
1201 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1202 {
1203         memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
1204         pr_debug("license of %s is %s\n", obj->path, obj->license);
1205         return 0;
1206 }
1207
1208 static int
1209 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1210 {
1211         __u32 kver;
1212
1213         if (size != sizeof(kver)) {
1214                 pr_warn("invalid kver section in %s\n", obj->path);
1215                 return -LIBBPF_ERRNO__FORMAT;
1216         }
1217         memcpy(&kver, data, sizeof(kver));
1218         obj->kern_version = kver;
1219         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1220         return 0;
1221 }
1222
1223 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1224 {
1225         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1226             type == BPF_MAP_TYPE_HASH_OF_MAPS)
1227                 return true;
1228         return false;
1229 }
1230
1231 int bpf_object__section_size(const struct bpf_object *obj, const char *name,
1232                              __u32 *size)
1233 {
1234         int ret = -ENOENT;
1235
1236         *size = 0;
1237         if (!name) {
1238                 return -EINVAL;
1239         } else if (!strcmp(name, DATA_SEC)) {
1240                 if (obj->efile.data)
1241                         *size = obj->efile.data->d_size;
1242         } else if (!strcmp(name, BSS_SEC)) {
1243                 if (obj->efile.bss)
1244                         *size = obj->efile.bss->d_size;
1245         } else if (!strcmp(name, RODATA_SEC)) {
1246                 if (obj->efile.rodata)
1247                         *size = obj->efile.rodata->d_size;
1248         } else if (!strcmp(name, STRUCT_OPS_SEC)) {
1249                 if (obj->efile.st_ops_data)
1250                         *size = obj->efile.st_ops_data->d_size;
1251         } else {
1252                 Elf_Scn *scn = elf_sec_by_name(obj, name);
1253                 Elf_Data *data = elf_sec_data(obj, scn);
1254
1255                 if (data) {
1256                         ret = 0; /* found it */
1257                         *size = data->d_size;
1258                 }
1259         }
1260
1261         return *size ? 0 : ret;
1262 }
1263
1264 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
1265                                 __u32 *off)
1266 {
1267         Elf_Data *symbols = obj->efile.symbols;
1268         const char *sname;
1269         size_t si;
1270
1271         if (!name || !off)
1272                 return -EINVAL;
1273
1274         for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
1275                 GElf_Sym sym;
1276
1277                 if (!gelf_getsym(symbols, si, &sym))
1278                         continue;
1279                 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1280                     GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
1281                         continue;
1282
1283                 sname = elf_sym_str(obj, sym.st_name);
1284                 if (!sname) {
1285                         pr_warn("failed to get sym name string for var %s\n",
1286                                 name);
1287                         return -EIO;
1288                 }
1289                 if (strcmp(name, sname) == 0) {
1290                         *off = sym.st_value;
1291                         return 0;
1292                 }
1293         }
1294
1295         return -ENOENT;
1296 }
1297
1298 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1299 {
1300         struct bpf_map *new_maps;
1301         size_t new_cap;
1302         int i;
1303
1304         if (obj->nr_maps < obj->maps_cap)
1305                 return &obj->maps[obj->nr_maps++];
1306
1307         new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
1308         new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
1309         if (!new_maps) {
1310                 pr_warn("alloc maps for object failed\n");
1311                 return ERR_PTR(-ENOMEM);
1312         }
1313
1314         obj->maps_cap = new_cap;
1315         obj->maps = new_maps;
1316
1317         /* zero out new maps */
1318         memset(obj->maps + obj->nr_maps, 0,
1319                (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
1320         /*
1321          * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
1322          * when failure (zclose won't close negative fd)).
1323          */
1324         for (i = obj->nr_maps; i < obj->maps_cap; i++) {
1325                 obj->maps[i].fd = -1;
1326                 obj->maps[i].inner_map_fd = -1;
1327         }
1328
1329         return &obj->maps[obj->nr_maps++];
1330 }
1331
1332 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1333 {
1334         long page_sz = sysconf(_SC_PAGE_SIZE);
1335         size_t map_sz;
1336
1337         map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
1338         map_sz = roundup(map_sz, page_sz);
1339         return map_sz;
1340 }
1341
1342 static char *internal_map_name(struct bpf_object *obj,
1343                                enum libbpf_map_type type)
1344 {
1345         char map_name[BPF_OBJ_NAME_LEN], *p;
1346         const char *sfx = libbpf_type_to_btf_name[type];
1347         int sfx_len = max((size_t)7, strlen(sfx));
1348         int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
1349                           strlen(obj->name));
1350
1351         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1352                  sfx_len, libbpf_type_to_btf_name[type]);
1353
1354         /* sanitise map name to characters allowed by kernel */
1355         for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1356                 if (!isalnum(*p) && *p != '_' && *p != '.')
1357                         *p = '_';
1358
1359         return strdup(map_name);
1360 }
1361
1362 static int
1363 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1364                               int sec_idx, void *data, size_t data_sz)
1365 {
1366         struct bpf_map_def *def;
1367         struct bpf_map *map;
1368         int err;
1369
1370         map = bpf_object__add_map(obj);
1371         if (IS_ERR(map))
1372                 return PTR_ERR(map);
1373
1374         map->libbpf_type = type;
1375         map->sec_idx = sec_idx;
1376         map->sec_offset = 0;
1377         map->name = internal_map_name(obj, type);
1378         if (!map->name) {
1379                 pr_warn("failed to alloc map name\n");
1380                 return -ENOMEM;
1381         }
1382
1383         def = &map->def;
1384         def->type = BPF_MAP_TYPE_ARRAY;
1385         def->key_size = sizeof(int);
1386         def->value_size = data_sz;
1387         def->max_entries = 1;
1388         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1389                          ? BPF_F_RDONLY_PROG : 0;
1390         def->map_flags |= BPF_F_MMAPABLE;
1391
1392         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1393                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
1394
1395         map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
1396                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1397         if (map->mmaped == MAP_FAILED) {
1398                 err = -errno;
1399                 map->mmaped = NULL;
1400                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1401                         map->name, err);
1402                 zfree(&map->name);
1403                 return err;
1404         }
1405
1406         if (data)
1407                 memcpy(map->mmaped, data, data_sz);
1408
1409         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1410         return 0;
1411 }
1412
1413 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1414 {
1415         int err;
1416
1417         /*
1418          * Populate obj->maps with libbpf internal maps.
1419          */
1420         if (obj->efile.data_shndx >= 0) {
1421                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1422                                                     obj->efile.data_shndx,
1423                                                     obj->efile.data->d_buf,
1424                                                     obj->efile.data->d_size);
1425                 if (err)
1426                         return err;
1427         }
1428         if (obj->efile.rodata_shndx >= 0) {
1429                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1430                                                     obj->efile.rodata_shndx,
1431                                                     obj->efile.rodata->d_buf,
1432                                                     obj->efile.rodata->d_size);
1433                 if (err)
1434                         return err;
1435
1436                 obj->rodata_map_idx = obj->nr_maps - 1;
1437         }
1438         if (obj->efile.bss_shndx >= 0) {
1439                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1440                                                     obj->efile.bss_shndx,
1441                                                     NULL,
1442                                                     obj->efile.bss->d_size);
1443                 if (err)
1444                         return err;
1445         }
1446         return 0;
1447 }
1448
1449
1450 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1451                                                const void *name)
1452 {
1453         int i;
1454
1455         for (i = 0; i < obj->nr_extern; i++) {
1456                 if (strcmp(obj->externs[i].name, name) == 0)
1457                         return &obj->externs[i];
1458         }
1459         return NULL;
1460 }
1461
1462 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1463                               char value)
1464 {
1465         switch (ext->kcfg.type) {
1466         case KCFG_BOOL:
1467                 if (value == 'm') {
1468                         pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
1469                                 ext->name, value);
1470                         return -EINVAL;
1471                 }
1472                 *(bool *)ext_val = value == 'y' ? true : false;
1473                 break;
1474         case KCFG_TRISTATE:
1475                 if (value == 'y')
1476                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1477                 else if (value == 'm')
1478                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1479                 else /* value == 'n' */
1480                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1481                 break;
1482         case KCFG_CHAR:
1483                 *(char *)ext_val = value;
1484                 break;
1485         case KCFG_UNKNOWN:
1486         case KCFG_INT:
1487         case KCFG_CHAR_ARR:
1488         default:
1489                 pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
1490                         ext->name, value);
1491                 return -EINVAL;
1492         }
1493         ext->is_set = true;
1494         return 0;
1495 }
1496
1497 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1498                               const char *value)
1499 {
1500         size_t len;
1501
1502         if (ext->kcfg.type != KCFG_CHAR_ARR) {
1503                 pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
1504                 return -EINVAL;
1505         }
1506
1507         len = strlen(value);
1508         if (value[len - 1] != '"') {
1509                 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1510                         ext->name, value);
1511                 return -EINVAL;
1512         }
1513
1514         /* strip quotes */
1515         len -= 2;
1516         if (len >= ext->kcfg.sz) {
1517                 pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
1518                         ext->name, value, len, ext->kcfg.sz - 1);
1519                 len = ext->kcfg.sz - 1;
1520         }
1521         memcpy(ext_val, value + 1, len);
1522         ext_val[len] = '\0';
1523         ext->is_set = true;
1524         return 0;
1525 }
1526
1527 static int parse_u64(const char *value, __u64 *res)
1528 {
1529         char *value_end;
1530         int err;
1531
1532         errno = 0;
1533         *res = strtoull(value, &value_end, 0);
1534         if (errno) {
1535                 err = -errno;
1536                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1537                 return err;
1538         }
1539         if (*value_end) {
1540                 pr_warn("failed to parse '%s' as integer completely\n", value);
1541                 return -EINVAL;
1542         }
1543         return 0;
1544 }
1545
1546 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1547 {
1548         int bit_sz = ext->kcfg.sz * 8;
1549
1550         if (ext->kcfg.sz == 8)
1551                 return true;
1552
1553         /* Validate that value stored in u64 fits in integer of `ext->sz`
1554          * bytes size without any loss of information. If the target integer
1555          * is signed, we rely on the following limits of integer type of
1556          * Y bits and subsequent transformation:
1557          *
1558          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1559          *            0 <= X + 2^(Y-1) <= 2^Y - 1
1560          *            0 <= X + 2^(Y-1) <  2^Y
1561          *
1562          *  For unsigned target integer, check that all the (64 - Y) bits are
1563          *  zero.
1564          */
1565         if (ext->kcfg.is_signed)
1566                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1567         else
1568                 return (v >> bit_sz) == 0;
1569 }
1570
1571 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1572                               __u64 value)
1573 {
1574         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1575                 pr_warn("extern (kcfg) %s=%llu should be integer\n",
1576                         ext->name, (unsigned long long)value);
1577                 return -EINVAL;
1578         }
1579         if (!is_kcfg_value_in_range(ext, value)) {
1580                 pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
1581                         ext->name, (unsigned long long)value, ext->kcfg.sz);
1582                 return -ERANGE;
1583         }
1584         switch (ext->kcfg.sz) {
1585                 case 1: *(__u8 *)ext_val = value; break;
1586                 case 2: *(__u16 *)ext_val = value; break;
1587                 case 4: *(__u32 *)ext_val = value; break;
1588                 case 8: *(__u64 *)ext_val = value; break;
1589                 default:
1590                         return -EINVAL;
1591         }
1592         ext->is_set = true;
1593         return 0;
1594 }
1595
1596 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1597                                             char *buf, void *data)
1598 {
1599         struct extern_desc *ext;
1600         char *sep, *value;
1601         int len, err = 0;
1602         void *ext_val;
1603         __u64 num;
1604
1605         if (strncmp(buf, "CONFIG_", 7))
1606                 return 0;
1607
1608         sep = strchr(buf, '=');
1609         if (!sep) {
1610                 pr_warn("failed to parse '%s': no separator\n", buf);
1611                 return -EINVAL;
1612         }
1613
1614         /* Trim ending '\n' */
1615         len = strlen(buf);
1616         if (buf[len - 1] == '\n')
1617                 buf[len - 1] = '\0';
1618         /* Split on '=' and ensure that a value is present. */
1619         *sep = '\0';
1620         if (!sep[1]) {
1621                 *sep = '=';
1622                 pr_warn("failed to parse '%s': no value\n", buf);
1623                 return -EINVAL;
1624         }
1625
1626         ext = find_extern_by_name(obj, buf);
1627         if (!ext || ext->is_set)
1628                 return 0;
1629
1630         ext_val = data + ext->kcfg.data_off;
1631         value = sep + 1;
1632
1633         switch (*value) {
1634         case 'y': case 'n': case 'm':
1635                 err = set_kcfg_value_tri(ext, ext_val, *value);
1636                 break;
1637         case '"':
1638                 err = set_kcfg_value_str(ext, ext_val, value);
1639                 break;
1640         default:
1641                 /* assume integer */
1642                 err = parse_u64(value, &num);
1643                 if (err) {
1644                         pr_warn("extern (kcfg) %s=%s should be integer\n",
1645                                 ext->name, value);
1646                         return err;
1647                 }
1648                 err = set_kcfg_value_num(ext, ext_val, num);
1649                 break;
1650         }
1651         if (err)
1652                 return err;
1653         pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
1654         return 0;
1655 }
1656
1657 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1658 {
1659         char buf[PATH_MAX];
1660         struct utsname uts;
1661         int len, err = 0;
1662         gzFile file;
1663
1664         uname(&uts);
1665         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1666         if (len < 0)
1667                 return -EINVAL;
1668         else if (len >= PATH_MAX)
1669                 return -ENAMETOOLONG;
1670
1671         /* gzopen also accepts uncompressed files. */
1672         file = gzopen(buf, "r");
1673         if (!file)
1674                 file = gzopen("/proc/config.gz", "r");
1675
1676         if (!file) {
1677                 pr_warn("failed to open system Kconfig\n");
1678                 return -ENOENT;
1679         }
1680
1681         while (gzgets(file, buf, sizeof(buf))) {
1682                 err = bpf_object__process_kconfig_line(obj, buf, data);
1683                 if (err) {
1684                         pr_warn("error parsing system Kconfig line '%s': %d\n",
1685                                 buf, err);
1686                         goto out;
1687                 }
1688         }
1689
1690 out:
1691         gzclose(file);
1692         return err;
1693 }
1694
1695 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1696                                         const char *config, void *data)
1697 {
1698         char buf[PATH_MAX];
1699         int err = 0;
1700         FILE *file;
1701
1702         file = fmemopen((void *)config, strlen(config), "r");
1703         if (!file) {
1704                 err = -errno;
1705                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
1706                 return err;
1707         }
1708
1709         while (fgets(buf, sizeof(buf), file)) {
1710                 err = bpf_object__process_kconfig_line(obj, buf, data);
1711                 if (err) {
1712                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1713                                 buf, err);
1714                         break;
1715                 }
1716         }
1717
1718         fclose(file);
1719         return err;
1720 }
1721
1722 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
1723 {
1724         struct extern_desc *last_ext = NULL, *ext;
1725         size_t map_sz;
1726         int i, err;
1727
1728         for (i = 0; i < obj->nr_extern; i++) {
1729                 ext = &obj->externs[i];
1730                 if (ext->type == EXT_KCFG)
1731                         last_ext = ext;
1732         }
1733
1734         if (!last_ext)
1735                 return 0;
1736
1737         map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
1738         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
1739                                             obj->efile.symbols_shndx,
1740                                             NULL, map_sz);
1741         if (err)
1742                 return err;
1743
1744         obj->kconfig_map_idx = obj->nr_maps - 1;
1745
1746         return 0;
1747 }
1748
1749 static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
1750 {
1751         Elf_Data *symbols = obj->efile.symbols;
1752         int i, map_def_sz = 0, nr_maps = 0, nr_syms;
1753         Elf_Data *data = NULL;
1754         Elf_Scn *scn;
1755
1756         if (obj->efile.maps_shndx < 0)
1757                 return 0;
1758
1759         if (!symbols)
1760                 return -EINVAL;
1761
1762
1763         scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
1764         data = elf_sec_data(obj, scn);
1765         if (!scn || !data) {
1766                 pr_warn("elf: failed to get legacy map definitions for %s\n",
1767                         obj->path);
1768                 return -EINVAL;
1769         }
1770
1771         /*
1772          * Count number of maps. Each map has a name.
1773          * Array of maps is not supported: only the first element is
1774          * considered.
1775          *
1776          * TODO: Detect array of map and report error.
1777          */
1778         nr_syms = symbols->d_size / sizeof(GElf_Sym);
1779         for (i = 0; i < nr_syms; i++) {
1780                 GElf_Sym sym;
1781
1782                 if (!gelf_getsym(symbols, i, &sym))
1783                         continue;
1784                 if (sym.st_shndx != obj->efile.maps_shndx)
1785                         continue;
1786                 nr_maps++;
1787         }
1788         /* Assume equally sized map definitions */
1789         pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
1790                  nr_maps, data->d_size, obj->path);
1791
1792         if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
1793                 pr_warn("elf: unable to determine legacy map definition size in %s\n",
1794                         obj->path);
1795                 return -EINVAL;
1796         }
1797         map_def_sz = data->d_size / nr_maps;
1798
1799         /* Fill obj->maps using data in "maps" section.  */
1800         for (i = 0; i < nr_syms; i++) {
1801                 GElf_Sym sym;
1802                 const char *map_name;
1803                 struct bpf_map_def *def;
1804                 struct bpf_map *map;
1805
1806                 if (!gelf_getsym(symbols, i, &sym))
1807                         continue;
1808                 if (sym.st_shndx != obj->efile.maps_shndx)
1809                         continue;
1810
1811                 map = bpf_object__add_map(obj);
1812                 if (IS_ERR(map))
1813                         return PTR_ERR(map);
1814
1815                 map_name = elf_sym_str(obj, sym.st_name);
1816                 if (!map_name) {
1817                         pr_warn("failed to get map #%d name sym string for obj %s\n",
1818                                 i, obj->path);
1819                         return -LIBBPF_ERRNO__FORMAT;
1820                 }
1821
1822                 map->libbpf_type = LIBBPF_MAP_UNSPEC;
1823                 map->sec_idx = sym.st_shndx;
1824                 map->sec_offset = sym.st_value;
1825                 pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
1826                          map_name, map->sec_idx, map->sec_offset);
1827                 if (sym.st_value + map_def_sz > data->d_size) {
1828                         pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
1829                                 obj->path, map_name);
1830                         return -EINVAL;
1831                 }
1832
1833                 map->name = strdup(map_name);
1834                 if (!map->name) {
1835                         pr_warn("failed to alloc map name\n");
1836                         return -ENOMEM;
1837                 }
1838                 pr_debug("map %d is \"%s\"\n", i, map->name);
1839                 def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
1840                 /*
1841                  * If the definition of the map in the object file fits in
1842                  * bpf_map_def, copy it.  Any extra fields in our version
1843                  * of bpf_map_def will default to zero as a result of the
1844                  * calloc above.
1845                  */
1846                 if (map_def_sz <= sizeof(struct bpf_map_def)) {
1847                         memcpy(&map->def, def, map_def_sz);
1848                 } else {
1849                         /*
1850                          * Here the map structure being read is bigger than what
1851                          * we expect, truncate if the excess bits are all zero.
1852                          * If they are not zero, reject this map as
1853                          * incompatible.
1854                          */
1855                         char *b;
1856
1857                         for (b = ((char *)def) + sizeof(struct bpf_map_def);
1858                              b < ((char *)def) + map_def_sz; b++) {
1859                                 if (*b != 0) {
1860                                         pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
1861                                                 obj->path, map_name);
1862                                         if (strict)
1863                                                 return -EINVAL;
1864                                 }
1865                         }
1866                         memcpy(&map->def, def, sizeof(struct bpf_map_def));
1867                 }
1868         }
1869         return 0;
1870 }
1871
1872 static const struct btf_type *
1873 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
1874 {
1875         const struct btf_type *t = btf__type_by_id(btf, id);
1876
1877         if (res_id)
1878                 *res_id = id;
1879
1880         while (btf_is_mod(t) || btf_is_typedef(t)) {
1881                 if (res_id)
1882                         *res_id = t->type;
1883                 t = btf__type_by_id(btf, t->type);
1884         }
1885
1886         return t;
1887 }
1888
1889 static const struct btf_type *
1890 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
1891 {
1892         const struct btf_type *t;
1893
1894         t = skip_mods_and_typedefs(btf, id, NULL);
1895         if (!btf_is_ptr(t))
1896                 return NULL;
1897
1898         t = skip_mods_and_typedefs(btf, t->type, res_id);
1899
1900         return btf_is_func_proto(t) ? t : NULL;
1901 }
1902
1903 static const char *btf_kind_str(const struct btf_type *t)
1904 {
1905         switch (btf_kind(t)) {
1906         case BTF_KIND_UNKN: return "void";
1907         case BTF_KIND_INT: return "int";
1908         case BTF_KIND_PTR: return "ptr";
1909         case BTF_KIND_ARRAY: return "array";
1910         case BTF_KIND_STRUCT: return "struct";
1911         case BTF_KIND_UNION: return "union";
1912         case BTF_KIND_ENUM: return "enum";
1913         case BTF_KIND_FWD: return "fwd";
1914         case BTF_KIND_TYPEDEF: return "typedef";
1915         case BTF_KIND_VOLATILE: return "volatile";
1916         case BTF_KIND_CONST: return "const";
1917         case BTF_KIND_RESTRICT: return "restrict";
1918         case BTF_KIND_FUNC: return "func";
1919         case BTF_KIND_FUNC_PROTO: return "func_proto";
1920         case BTF_KIND_VAR: return "var";
1921         case BTF_KIND_DATASEC: return "datasec";
1922         default: return "unknown";
1923         }
1924 }
1925
1926 /*
1927  * Fetch integer attribute of BTF map definition. Such attributes are
1928  * represented using a pointer to an array, in which dimensionality of array
1929  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
1930  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
1931  * type definition, while using only sizeof(void *) space in ELF data section.
1932  */
1933 static bool get_map_field_int(const char *map_name, const struct btf *btf,
1934                               const struct btf_member *m, __u32 *res)
1935 {
1936         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
1937         const char *name = btf__name_by_offset(btf, m->name_off);
1938         const struct btf_array *arr_info;
1939         const struct btf_type *arr_t;
1940
1941         if (!btf_is_ptr(t)) {
1942                 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
1943                         map_name, name, btf_kind_str(t));
1944                 return false;
1945         }
1946
1947         arr_t = btf__type_by_id(btf, t->type);
1948         if (!arr_t) {
1949                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
1950                         map_name, name, t->type);
1951                 return false;
1952         }
1953         if (!btf_is_array(arr_t)) {
1954                 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
1955                         map_name, name, btf_kind_str(arr_t));
1956                 return false;
1957         }
1958         arr_info = btf_array(arr_t);
1959         *res = arr_info->nelems;
1960         return true;
1961 }
1962
1963 static int build_map_pin_path(struct bpf_map *map, const char *path)
1964 {
1965         char buf[PATH_MAX];
1966         int len;
1967
1968         if (!path)
1969                 path = "/sys/fs/bpf";
1970
1971         len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
1972         if (len < 0)
1973                 return -EINVAL;
1974         else if (len >= PATH_MAX)
1975                 return -ENAMETOOLONG;
1976
1977         return bpf_map__set_pin_path(map, buf);
1978 }
1979
1980
1981 static int parse_btf_map_def(struct bpf_object *obj,
1982                              struct bpf_map *map,
1983                              const struct btf_type *def,
1984                              bool strict, bool is_inner,
1985                              const char *pin_root_path)
1986 {
1987         const struct btf_type *t;
1988         const struct btf_member *m;
1989         int vlen, i;
1990
1991         vlen = btf_vlen(def);
1992         m = btf_members(def);
1993         for (i = 0; i < vlen; i++, m++) {
1994                 const char *name = btf__name_by_offset(obj->btf, m->name_off);
1995
1996                 if (!name) {
1997                         pr_warn("map '%s': invalid field #%d.\n", map->name, i);
1998                         return -EINVAL;
1999                 }
2000                 if (strcmp(name, "type") == 0) {
2001                         if (!get_map_field_int(map->name, obj->btf, m,
2002                                                &map->def.type))
2003                                 return -EINVAL;
2004                         pr_debug("map '%s': found type = %u.\n",
2005                                  map->name, map->def.type);
2006                 } else if (strcmp(name, "max_entries") == 0) {
2007                         if (!get_map_field_int(map->name, obj->btf, m,
2008                                                &map->def.max_entries))
2009                                 return -EINVAL;
2010                         pr_debug("map '%s': found max_entries = %u.\n",
2011                                  map->name, map->def.max_entries);
2012                 } else if (strcmp(name, "map_flags") == 0) {
2013                         if (!get_map_field_int(map->name, obj->btf, m,
2014                                                &map->def.map_flags))
2015                                 return -EINVAL;
2016                         pr_debug("map '%s': found map_flags = %u.\n",
2017                                  map->name, map->def.map_flags);
2018                 } else if (strcmp(name, "numa_node") == 0) {
2019                         if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
2020                                 return -EINVAL;
2021                         pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
2022                 } else if (strcmp(name, "key_size") == 0) {
2023                         __u32 sz;
2024
2025                         if (!get_map_field_int(map->name, obj->btf, m, &sz))
2026                                 return -EINVAL;
2027                         pr_debug("map '%s': found key_size = %u.\n",
2028                                  map->name, sz);
2029                         if (map->def.key_size && map->def.key_size != sz) {
2030                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
2031                                         map->name, map->def.key_size, sz);
2032                                 return -EINVAL;
2033                         }
2034                         map->def.key_size = sz;
2035                 } else if (strcmp(name, "key") == 0) {
2036                         __s64 sz;
2037
2038                         t = btf__type_by_id(obj->btf, m->type);
2039                         if (!t) {
2040                                 pr_warn("map '%s': key type [%d] not found.\n",
2041                                         map->name, m->type);
2042                                 return -EINVAL;
2043                         }
2044                         if (!btf_is_ptr(t)) {
2045                                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2046                                         map->name, btf_kind_str(t));
2047                                 return -EINVAL;
2048                         }
2049                         sz = btf__resolve_size(obj->btf, t->type);
2050                         if (sz < 0) {
2051                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2052                                         map->name, t->type, (ssize_t)sz);
2053                                 return sz;
2054                         }
2055                         pr_debug("map '%s': found key [%u], sz = %zd.\n",
2056                                  map->name, t->type, (ssize_t)sz);
2057                         if (map->def.key_size && map->def.key_size != sz) {
2058                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2059                                         map->name, map->def.key_size, (ssize_t)sz);
2060                                 return -EINVAL;
2061                         }
2062                         map->def.key_size = sz;
2063                         map->btf_key_type_id = t->type;
2064                 } else if (strcmp(name, "value_size") == 0) {
2065                         __u32 sz;
2066
2067                         if (!get_map_field_int(map->name, obj->btf, m, &sz))
2068                                 return -EINVAL;
2069                         pr_debug("map '%s': found value_size = %u.\n",
2070                                  map->name, sz);
2071                         if (map->def.value_size && map->def.value_size != sz) {
2072                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2073                                         map->name, map->def.value_size, sz);
2074                                 return -EINVAL;
2075                         }
2076                         map->def.value_size = sz;
2077                 } else if (strcmp(name, "value") == 0) {
2078                         __s64 sz;
2079
2080                         t = btf__type_by_id(obj->btf, m->type);
2081                         if (!t) {
2082                                 pr_warn("map '%s': value type [%d] not found.\n",
2083                                         map->name, m->type);
2084                                 return -EINVAL;
2085                         }
2086                         if (!btf_is_ptr(t)) {
2087                                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2088                                         map->name, btf_kind_str(t));
2089                                 return -EINVAL;
2090                         }
2091                         sz = btf__resolve_size(obj->btf, t->type);
2092                         if (sz < 0) {
2093                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2094                                         map->name, t->type, (ssize_t)sz);
2095                                 return sz;
2096                         }
2097                         pr_debug("map '%s': found value [%u], sz = %zd.\n",
2098                                  map->name, t->type, (ssize_t)sz);
2099                         if (map->def.value_size && map->def.value_size != sz) {
2100                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2101                                         map->name, map->def.value_size, (ssize_t)sz);
2102                                 return -EINVAL;
2103                         }
2104                         map->def.value_size = sz;
2105                         map->btf_value_type_id = t->type;
2106                 }
2107                 else if (strcmp(name, "values") == 0) {
2108                         int err;
2109
2110                         if (is_inner) {
2111                                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2112                                         map->name);
2113                                 return -ENOTSUP;
2114                         }
2115                         if (i != vlen - 1) {
2116                                 pr_warn("map '%s': '%s' member should be last.\n",
2117                                         map->name, name);
2118                                 return -EINVAL;
2119                         }
2120                         if (!bpf_map_type__is_map_in_map(map->def.type)) {
2121                                 pr_warn("map '%s': should be map-in-map.\n",
2122                                         map->name);
2123                                 return -ENOTSUP;
2124                         }
2125                         if (map->def.value_size && map->def.value_size != 4) {
2126                                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2127                                         map->name, map->def.value_size);
2128                                 return -EINVAL;
2129                         }
2130                         map->def.value_size = 4;
2131                         t = btf__type_by_id(obj->btf, m->type);
2132                         if (!t) {
2133                                 pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
2134                                         map->name, m->type);
2135                                 return -EINVAL;
2136                         }
2137                         if (!btf_is_array(t) || btf_array(t)->nelems) {
2138                                 pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
2139                                         map->name);
2140                                 return -EINVAL;
2141                         }
2142                         t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
2143                                                    NULL);
2144                         if (!btf_is_ptr(t)) {
2145                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2146                                         map->name, btf_kind_str(t));
2147                                 return -EINVAL;
2148                         }
2149                         t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
2150                         if (!btf_is_struct(t)) {
2151                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2152                                         map->name, btf_kind_str(t));
2153                                 return -EINVAL;
2154                         }
2155
2156                         map->inner_map = calloc(1, sizeof(*map->inner_map));
2157                         if (!map->inner_map)
2158                                 return -ENOMEM;
2159                         map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
2160                         map->inner_map->name = malloc(strlen(map->name) +
2161                                                       sizeof(".inner") + 1);
2162                         if (!map->inner_map->name)
2163                                 return -ENOMEM;
2164                         sprintf(map->inner_map->name, "%s.inner", map->name);
2165
2166                         err = parse_btf_map_def(obj, map->inner_map, t, strict,
2167                                                 true /* is_inner */, NULL);
2168                         if (err)
2169                                 return err;
2170                 } else if (strcmp(name, "pinning") == 0) {
2171                         __u32 val;
2172                         int err;
2173
2174                         if (is_inner) {
2175                                 pr_debug("map '%s': inner def can't be pinned.\n",
2176                                          map->name);
2177                                 return -EINVAL;
2178                         }
2179                         if (!get_map_field_int(map->name, obj->btf, m, &val))
2180                                 return -EINVAL;
2181                         pr_debug("map '%s': found pinning = %u.\n",
2182                                  map->name, val);
2183
2184                         if (val != LIBBPF_PIN_NONE &&
2185                             val != LIBBPF_PIN_BY_NAME) {
2186                                 pr_warn("map '%s': invalid pinning value %u.\n",
2187                                         map->name, val);
2188                                 return -EINVAL;
2189                         }
2190                         if (val == LIBBPF_PIN_BY_NAME) {
2191                                 err = build_map_pin_path(map, pin_root_path);
2192                                 if (err) {
2193                                         pr_warn("map '%s': couldn't build pin path.\n",
2194                                                 map->name);
2195                                         return err;
2196                                 }
2197                         }
2198                 } else {
2199                         if (strict) {
2200                                 pr_warn("map '%s': unknown field '%s'.\n",
2201                                         map->name, name);
2202                                 return -ENOTSUP;
2203                         }
2204                         pr_debug("map '%s': ignoring unknown field '%s'.\n",
2205                                  map->name, name);
2206                 }
2207         }
2208
2209         if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
2210                 pr_warn("map '%s': map type isn't specified.\n", map->name);
2211                 return -EINVAL;
2212         }
2213
2214         return 0;
2215 }
2216
2217 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2218                                          const struct btf_type *sec,
2219                                          int var_idx, int sec_idx,
2220                                          const Elf_Data *data, bool strict,
2221                                          const char *pin_root_path)
2222 {
2223         const struct btf_type *var, *def;
2224         const struct btf_var_secinfo *vi;
2225         const struct btf_var *var_extra;
2226         const char *map_name;
2227         struct bpf_map *map;
2228
2229         vi = btf_var_secinfos(sec) + var_idx;
2230         var = btf__type_by_id(obj->btf, vi->type);
2231         var_extra = btf_var(var);
2232         map_name = btf__name_by_offset(obj->btf, var->name_off);
2233
2234         if (map_name == NULL || map_name[0] == '\0') {
2235                 pr_warn("map #%d: empty name.\n", var_idx);
2236                 return -EINVAL;
2237         }
2238         if ((__u64)vi->offset + vi->size > data->d_size) {
2239                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2240                 return -EINVAL;
2241         }
2242         if (!btf_is_var(var)) {
2243                 pr_warn("map '%s': unexpected var kind %s.\n",
2244                         map_name, btf_kind_str(var));
2245                 return -EINVAL;
2246         }
2247         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
2248             var_extra->linkage != BTF_VAR_STATIC) {
2249                 pr_warn("map '%s': unsupported var linkage %u.\n",
2250                         map_name, var_extra->linkage);
2251                 return -EOPNOTSUPP;
2252         }
2253
2254         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2255         if (!btf_is_struct(def)) {
2256                 pr_warn("map '%s': unexpected def kind %s.\n",
2257                         map_name, btf_kind_str(var));
2258                 return -EINVAL;
2259         }
2260         if (def->size > vi->size) {
2261                 pr_warn("map '%s': invalid def size.\n", map_name);
2262                 return -EINVAL;
2263         }
2264
2265         map = bpf_object__add_map(obj);
2266         if (IS_ERR(map))
2267                 return PTR_ERR(map);
2268         map->name = strdup(map_name);
2269         if (!map->name) {
2270                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2271                 return -ENOMEM;
2272         }
2273         map->libbpf_type = LIBBPF_MAP_UNSPEC;
2274         map->def.type = BPF_MAP_TYPE_UNSPEC;
2275         map->sec_idx = sec_idx;
2276         map->sec_offset = vi->offset;
2277         map->btf_var_idx = var_idx;
2278         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2279                  map_name, map->sec_idx, map->sec_offset);
2280
2281         return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
2282 }
2283
2284 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2285                                           const char *pin_root_path)
2286 {
2287         const struct btf_type *sec = NULL;
2288         int nr_types, i, vlen, err;
2289         const struct btf_type *t;
2290         const char *name;
2291         Elf_Data *data;
2292         Elf_Scn *scn;
2293
2294         if (obj->efile.btf_maps_shndx < 0)
2295                 return 0;
2296
2297         scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2298         data = elf_sec_data(obj, scn);
2299         if (!scn || !data) {
2300                 pr_warn("elf: failed to get %s map definitions for %s\n",
2301                         MAPS_ELF_SEC, obj->path);
2302                 return -EINVAL;
2303         }
2304
2305         nr_types = btf__get_nr_types(obj->btf);
2306         for (i = 1; i <= nr_types; i++) {
2307                 t = btf__type_by_id(obj->btf, i);
2308                 if (!btf_is_datasec(t))
2309                         continue;
2310                 name = btf__name_by_offset(obj->btf, t->name_off);
2311                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2312                         sec = t;
2313                         obj->efile.btf_maps_sec_btf_id = i;
2314                         break;
2315                 }
2316         }
2317
2318         if (!sec) {
2319                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2320                 return -ENOENT;
2321         }
2322
2323         vlen = btf_vlen(sec);
2324         for (i = 0; i < vlen; i++) {
2325                 err = bpf_object__init_user_btf_map(obj, sec, i,
2326                                                     obj->efile.btf_maps_shndx,
2327                                                     data, strict,
2328                                                     pin_root_path);
2329                 if (err)
2330                         return err;
2331         }
2332
2333         return 0;
2334 }
2335
2336 static int bpf_object__init_maps(struct bpf_object *obj,
2337                                  const struct bpf_object_open_opts *opts)
2338 {
2339         const char *pin_root_path;
2340         bool strict;
2341         int err;
2342
2343         strict = !OPTS_GET(opts, relaxed_maps, false);
2344         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2345
2346         err = bpf_object__init_user_maps(obj, strict);
2347         err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2348         err = err ?: bpf_object__init_global_data_maps(obj);
2349         err = err ?: bpf_object__init_kconfig_map(obj);
2350         err = err ?: bpf_object__init_struct_ops_maps(obj);
2351         if (err)
2352                 return err;
2353
2354         return 0;
2355 }
2356
2357 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2358 {
2359         GElf_Shdr sh;
2360
2361         if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
2362                 return false;
2363
2364         return sh.sh_flags & SHF_EXECINSTR;
2365 }
2366
2367 static bool btf_needs_sanitization(struct bpf_object *obj)
2368 {
2369         bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
2370         bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
2371         bool has_func = kernel_supports(FEAT_BTF_FUNC);
2372
2373         return !has_func || !has_datasec || !has_func_global;
2374 }
2375
2376 static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2377 {
2378         bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
2379         bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
2380         bool has_func = kernel_supports(FEAT_BTF_FUNC);
2381         struct btf_type *t;
2382         int i, j, vlen;
2383
2384         for (i = 1; i <= btf__get_nr_types(btf); i++) {
2385                 t = (struct btf_type *)btf__type_by_id(btf, i);
2386
2387                 if (!has_datasec && btf_is_var(t)) {
2388                         /* replace VAR with INT */
2389                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2390                         /*
2391                          * using size = 1 is the safest choice, 4 will be too
2392                          * big and cause kernel BTF validation failure if
2393                          * original variable took less than 4 bytes
2394                          */
2395                         t->size = 1;
2396                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2397                 } else if (!has_datasec && btf_is_datasec(t)) {
2398                         /* replace DATASEC with STRUCT */
2399                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
2400                         struct btf_member *m = btf_members(t);
2401                         struct btf_type *vt;
2402                         char *name;
2403
2404                         name = (char *)btf__name_by_offset(btf, t->name_off);
2405                         while (*name) {
2406                                 if (*name == '.')
2407                                         *name = '_';
2408                                 name++;
2409                         }
2410
2411                         vlen = btf_vlen(t);
2412                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2413                         for (j = 0; j < vlen; j++, v++, m++) {
2414                                 /* order of field assignments is important */
2415                                 m->offset = v->offset * 8;
2416                                 m->type = v->type;
2417                                 /* preserve variable name as member name */
2418                                 vt = (void *)btf__type_by_id(btf, v->type);
2419                                 m->name_off = vt->name_off;
2420                         }
2421                 } else if (!has_func && btf_is_func_proto(t)) {
2422                         /* replace FUNC_PROTO with ENUM */
2423                         vlen = btf_vlen(t);
2424                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2425                         t->size = sizeof(__u32); /* kernel enforced */
2426                 } else if (!has_func && btf_is_func(t)) {
2427                         /* replace FUNC with TYPEDEF */
2428                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2429                 } else if (!has_func_global && btf_is_func(t)) {
2430                         /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2431                         t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2432                 }
2433         }
2434 }
2435
2436 static bool libbpf_needs_btf(const struct bpf_object *obj)
2437 {
2438         return obj->efile.btf_maps_shndx >= 0 ||
2439                obj->efile.st_ops_shndx >= 0 ||
2440                obj->nr_extern > 0;
2441 }
2442
2443 static bool kernel_needs_btf(const struct bpf_object *obj)
2444 {
2445         return obj->efile.st_ops_shndx >= 0;
2446 }
2447
2448 static int bpf_object__init_btf(struct bpf_object *obj,
2449                                 Elf_Data *btf_data,
2450                                 Elf_Data *btf_ext_data)
2451 {
2452         int err = -ENOENT;
2453
2454         if (btf_data) {
2455                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2456                 if (IS_ERR(obj->btf)) {
2457                         err = PTR_ERR(obj->btf);
2458                         obj->btf = NULL;
2459                         pr_warn("Error loading ELF section %s: %d.\n",
2460                                 BTF_ELF_SEC, err);
2461                         goto out;
2462                 }
2463                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2464                 btf__set_pointer_size(obj->btf, 8);
2465                 err = 0;
2466         }
2467         if (btf_ext_data) {
2468                 if (!obj->btf) {
2469                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2470                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2471                         goto out;
2472                 }
2473                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
2474                                             btf_ext_data->d_size);
2475                 if (IS_ERR(obj->btf_ext)) {
2476                         pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
2477                                 BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
2478                         obj->btf_ext = NULL;
2479                         goto out;
2480                 }
2481         }
2482 out:
2483         if (err && libbpf_needs_btf(obj)) {
2484                 pr_warn("BTF is required, but is missing or corrupted.\n");
2485                 return err;
2486         }
2487         return 0;
2488 }
2489
2490 static int bpf_object__finalize_btf(struct bpf_object *obj)
2491 {
2492         int err;
2493
2494         if (!obj->btf)
2495                 return 0;
2496
2497         err = btf__finalize_data(obj, obj->btf);
2498         if (err) {
2499                 pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
2500                 return err;
2501         }
2502
2503         return 0;
2504 }
2505
2506 static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
2507 {
2508         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
2509             prog->type == BPF_PROG_TYPE_LSM)
2510                 return true;
2511
2512         /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
2513          * also need vmlinux BTF
2514          */
2515         if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
2516                 return true;
2517
2518         return false;
2519 }
2520
2521 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
2522 {
2523         bool need_vmlinux_btf = false;
2524         struct bpf_program *prog;
2525         int err;
2526
2527         /* CO-RE relocations need kernel BTF */
2528         if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
2529                 need_vmlinux_btf = true;
2530
2531         bpf_object__for_each_program(prog, obj) {
2532                 if (!prog->load)
2533                         continue;
2534                 if (libbpf_prog_needs_vmlinux_btf(prog)) {
2535                         need_vmlinux_btf = true;
2536                         break;
2537                 }
2538         }
2539
2540         if (!need_vmlinux_btf)
2541                 return 0;
2542
2543         obj->btf_vmlinux = libbpf_find_kernel_btf();
2544         if (IS_ERR(obj->btf_vmlinux)) {
2545                 err = PTR_ERR(obj->btf_vmlinux);
2546                 pr_warn("Error loading vmlinux BTF: %d\n", err);
2547                 obj->btf_vmlinux = NULL;
2548                 return err;
2549         }
2550         return 0;
2551 }
2552
2553 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
2554 {
2555         struct btf *kern_btf = obj->btf;
2556         bool btf_mandatory, sanitize;
2557         int err = 0;
2558
2559         if (!obj->btf)
2560                 return 0;
2561
2562         if (!kernel_supports(FEAT_BTF)) {
2563                 if (kernel_needs_btf(obj)) {
2564                         err = -EOPNOTSUPP;
2565                         goto report;
2566                 }
2567                 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
2568                 return 0;
2569         }
2570
2571         sanitize = btf_needs_sanitization(obj);
2572         if (sanitize) {
2573                 const void *raw_data;
2574                 __u32 sz;
2575
2576                 /* clone BTF to sanitize a copy and leave the original intact */
2577                 raw_data = btf__get_raw_data(obj->btf, &sz);
2578                 kern_btf = btf__new(raw_data, sz);
2579                 if (IS_ERR(kern_btf))
2580                         return PTR_ERR(kern_btf);
2581
2582                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2583                 btf__set_pointer_size(obj->btf, 8);
2584                 bpf_object__sanitize_btf(obj, kern_btf);
2585         }
2586
2587         err = btf__load(kern_btf);
2588         if (sanitize) {
2589                 if (!err) {
2590                         /* move fd to libbpf's BTF */
2591                         btf__set_fd(obj->btf, btf__fd(kern_btf));
2592                         btf__set_fd(kern_btf, -1);
2593                 }
2594                 btf__free(kern_btf);
2595         }
2596 report:
2597         if (err) {
2598                 btf_mandatory = kernel_needs_btf(obj);
2599                 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
2600                         btf_mandatory ? "BTF is mandatory, can't proceed."
2601                                       : "BTF is optional, ignoring.");
2602                 if (!btf_mandatory)
2603                         err = 0;
2604         }
2605         return err;
2606 }
2607
2608 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
2609 {
2610         const char *name;
2611
2612         name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
2613         if (!name) {
2614                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2615                         off, obj->path, elf_errmsg(-1));
2616                 return NULL;
2617         }
2618
2619         return name;
2620 }
2621
2622 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
2623 {
2624         const char *name;
2625
2626         name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
2627         if (!name) {
2628                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2629                         off, obj->path, elf_errmsg(-1));
2630                 return NULL;
2631         }
2632
2633         return name;
2634 }
2635
2636 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
2637 {
2638         Elf_Scn *scn;
2639
2640         scn = elf_getscn(obj->efile.elf, idx);
2641         if (!scn) {
2642                 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
2643                         idx, obj->path, elf_errmsg(-1));
2644                 return NULL;
2645         }
2646         return scn;
2647 }
2648
2649 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
2650 {
2651         Elf_Scn *scn = NULL;
2652         Elf *elf = obj->efile.elf;
2653         const char *sec_name;
2654
2655         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2656                 sec_name = elf_sec_name(obj, scn);
2657                 if (!sec_name)
2658                         return NULL;
2659
2660                 if (strcmp(sec_name, name) != 0)
2661                         continue;
2662
2663                 return scn;
2664         }
2665         return NULL;
2666 }
2667
2668 static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
2669 {
2670         if (!scn)
2671                 return -EINVAL;
2672
2673         if (gelf_getshdr(scn, hdr) != hdr) {
2674                 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
2675                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2676                 return -EINVAL;
2677         }
2678
2679         return 0;
2680 }
2681
2682 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
2683 {
2684         const char *name;
2685         GElf_Shdr sh;
2686
2687         if (!scn)
2688                 return NULL;
2689
2690         if (elf_sec_hdr(obj, scn, &sh))
2691                 return NULL;
2692
2693         name = elf_sec_str(obj, sh.sh_name);
2694         if (!name) {
2695                 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
2696                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2697                 return NULL;
2698         }
2699
2700         return name;
2701 }
2702
2703 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
2704 {
2705         Elf_Data *data;
2706
2707         if (!scn)
2708                 return NULL;
2709
2710         data = elf_getdata(scn, 0);
2711         if (!data) {
2712                 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
2713                         elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
2714                         obj->path, elf_errmsg(-1));
2715                 return NULL;
2716         }
2717
2718         return data;
2719 }
2720
2721 static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
2722                               size_t off, __u32 sym_type, GElf_Sym *sym)
2723 {
2724         Elf_Data *symbols = obj->efile.symbols;
2725         size_t n = symbols->d_size / sizeof(GElf_Sym);
2726         int i;
2727
2728         for (i = 0; i < n; i++) {
2729                 if (!gelf_getsym(symbols, i, sym))
2730                         continue;
2731                 if (sym->st_shndx != sec_idx || sym->st_value != off)
2732                         continue;
2733                 if (GELF_ST_TYPE(sym->st_info) != sym_type)
2734                         continue;
2735                 return 0;
2736         }
2737
2738         return -ENOENT;
2739 }
2740
2741 static bool is_sec_name_dwarf(const char *name)
2742 {
2743         /* approximation, but the actual list is too long */
2744         return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
2745 }
2746
2747 static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
2748 {
2749         /* no special handling of .strtab */
2750         if (hdr->sh_type == SHT_STRTAB)
2751                 return true;
2752
2753         /* ignore .llvm_addrsig section as well */
2754         if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
2755                 return true;
2756
2757         /* no subprograms will lead to an empty .text section, ignore it */
2758         if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
2759             strcmp(name, ".text") == 0)
2760                 return true;
2761
2762         /* DWARF sections */
2763         if (is_sec_name_dwarf(name))
2764                 return true;
2765
2766         if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
2767                 name += sizeof(".rel") - 1;
2768                 /* DWARF section relocations */
2769                 if (is_sec_name_dwarf(name))
2770                         return true;
2771
2772                 /* .BTF and .BTF.ext don't need relocations */
2773                 if (strcmp(name, BTF_ELF_SEC) == 0 ||
2774                     strcmp(name, BTF_EXT_ELF_SEC) == 0)
2775                         return true;
2776         }
2777
2778         return false;
2779 }
2780
2781 static int cmp_progs(const void *_a, const void *_b)
2782 {
2783         const struct bpf_program *a = _a;
2784         const struct bpf_program *b = _b;
2785
2786         if (a->sec_idx != b->sec_idx)
2787                 return a->sec_idx < b->sec_idx ? -1 : 1;
2788
2789         /* sec_insn_off can't be the same within the section */
2790         return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
2791 }
2792
2793 static int bpf_object__elf_collect(struct bpf_object *obj)
2794 {
2795         Elf *elf = obj->efile.elf;
2796         Elf_Data *btf_ext_data = NULL;
2797         Elf_Data *btf_data = NULL;
2798         int idx = 0, err = 0;
2799         const char *name;
2800         Elf_Data *data;
2801         Elf_Scn *scn;
2802         GElf_Shdr sh;
2803
2804         /* a bunch of ELF parsing functionality depends on processing symbols,
2805          * so do the first pass and find the symbol table
2806          */
2807         scn = NULL;
2808         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2809                 if (elf_sec_hdr(obj, scn, &sh))
2810                         return -LIBBPF_ERRNO__FORMAT;
2811
2812                 if (sh.sh_type == SHT_SYMTAB) {
2813                         if (obj->efile.symbols) {
2814                                 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
2815                                 return -LIBBPF_ERRNO__FORMAT;
2816                         }
2817
2818                         data = elf_sec_data(obj, scn);
2819                         if (!data)
2820                                 return -LIBBPF_ERRNO__FORMAT;
2821
2822                         obj->efile.symbols = data;
2823                         obj->efile.symbols_shndx = elf_ndxscn(scn);
2824                         obj->efile.strtabidx = sh.sh_link;
2825                 }
2826         }
2827
2828         scn = NULL;
2829         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2830                 idx++;
2831
2832                 if (elf_sec_hdr(obj, scn, &sh))
2833                         return -LIBBPF_ERRNO__FORMAT;
2834
2835                 name = elf_sec_str(obj, sh.sh_name);
2836                 if (!name)
2837                         return -LIBBPF_ERRNO__FORMAT;
2838
2839                 if (ignore_elf_section(&sh, name))
2840                         continue;
2841
2842                 data = elf_sec_data(obj, scn);
2843                 if (!data)
2844                         return -LIBBPF_ERRNO__FORMAT;
2845
2846                 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
2847                          idx, name, (unsigned long)data->d_size,
2848                          (int)sh.sh_link, (unsigned long)sh.sh_flags,
2849                          (int)sh.sh_type);
2850
2851                 if (strcmp(name, "license") == 0) {
2852                         err = bpf_object__init_license(obj, data->d_buf, data->d_size);
2853                         if (err)
2854                                 return err;
2855                 } else if (strcmp(name, "version") == 0) {
2856                         err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
2857                         if (err)
2858                                 return err;
2859                 } else if (strcmp(name, "maps") == 0) {
2860                         obj->efile.maps_shndx = idx;
2861                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
2862                         obj->efile.btf_maps_shndx = idx;
2863                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
2864                         btf_data = data;
2865                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
2866                         btf_ext_data = data;
2867                 } else if (sh.sh_type == SHT_SYMTAB) {
2868                         /* already processed during the first pass above */
2869                 } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
2870                         if (sh.sh_flags & SHF_EXECINSTR) {
2871                                 if (strcmp(name, ".text") == 0)
2872                                         obj->efile.text_shndx = idx;
2873                                 err = bpf_object__add_programs(obj, data, name, idx);
2874                                 if (err)
2875                                         return err;
2876                         } else if (strcmp(name, DATA_SEC) == 0) {
2877                                 obj->efile.data = data;
2878                                 obj->efile.data_shndx = idx;
2879                         } else if (strcmp(name, RODATA_SEC) == 0) {
2880                                 obj->efile.rodata = data;
2881                                 obj->efile.rodata_shndx = idx;
2882                         } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
2883                                 obj->efile.st_ops_data = data;
2884                                 obj->efile.st_ops_shndx = idx;
2885                         } else {
2886                                 pr_info("elf: skipping unrecognized data section(%d) %s\n",
2887                                         idx, name);
2888                         }
2889                 } else if (sh.sh_type == SHT_REL) {
2890                         int nr_sects = obj->efile.nr_reloc_sects;
2891                         void *sects = obj->efile.reloc_sects;
2892                         int sec = sh.sh_info; /* points to other section */
2893
2894                         /* Only do relo for section with exec instructions */
2895                         if (!section_have_execinstr(obj, sec) &&
2896                             strcmp(name, ".rel" STRUCT_OPS_SEC) &&
2897                             strcmp(name, ".rel" MAPS_ELF_SEC)) {
2898                                 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
2899                                         idx, name, sec,
2900                                         elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
2901                                 continue;
2902                         }
2903
2904                         sects = libbpf_reallocarray(sects, nr_sects + 1,
2905                                                     sizeof(*obj->efile.reloc_sects));
2906                         if (!sects)
2907                                 return -ENOMEM;
2908
2909                         obj->efile.reloc_sects = sects;
2910                         obj->efile.nr_reloc_sects++;
2911
2912                         obj->efile.reloc_sects[nr_sects].shdr = sh;
2913                         obj->efile.reloc_sects[nr_sects].data = data;
2914                 } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
2915                         obj->efile.bss = data;
2916                         obj->efile.bss_shndx = idx;
2917                 } else {
2918                         pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
2919                                 (size_t)sh.sh_size);
2920                 }
2921         }
2922
2923         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
2924                 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
2925                 return -LIBBPF_ERRNO__FORMAT;
2926         }
2927
2928         /* sort BPF programs by section name and in-section instruction offset
2929          * for faster search */
2930         qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
2931
2932         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
2933 }
2934
2935 static bool sym_is_extern(const GElf_Sym *sym)
2936 {
2937         int bind = GELF_ST_BIND(sym->st_info);
2938         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
2939         return sym->st_shndx == SHN_UNDEF &&
2940                (bind == STB_GLOBAL || bind == STB_WEAK) &&
2941                GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
2942 }
2943
2944 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
2945 {
2946         const struct btf_type *t;
2947         const char *var_name;
2948         int i, n;
2949
2950         if (!btf)
2951                 return -ESRCH;
2952
2953         n = btf__get_nr_types(btf);
2954         for (i = 1; i <= n; i++) {
2955                 t = btf__type_by_id(btf, i);
2956
2957                 if (!btf_is_var(t))
2958                         continue;
2959
2960                 var_name = btf__name_by_offset(btf, t->name_off);
2961                 if (strcmp(var_name, ext_name))
2962                         continue;
2963
2964                 if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
2965                         return -EINVAL;
2966
2967                 return i;
2968         }
2969
2970         return -ENOENT;
2971 }
2972
2973 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
2974         const struct btf_var_secinfo *vs;
2975         const struct btf_type *t;
2976         int i, j, n;
2977
2978         if (!btf)
2979                 return -ESRCH;
2980
2981         n = btf__get_nr_types(btf);
2982         for (i = 1; i <= n; i++) {
2983                 t = btf__type_by_id(btf, i);
2984
2985                 if (!btf_is_datasec(t))
2986                         continue;
2987
2988                 vs = btf_var_secinfos(t);
2989                 for (j = 0; j < btf_vlen(t); j++, vs++) {
2990                         if (vs->type == ext_btf_id)
2991                                 return i;
2992                 }
2993         }
2994
2995         return -ENOENT;
2996 }
2997
2998 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
2999                                      bool *is_signed)
3000 {
3001         const struct btf_type *t;
3002         const char *name;
3003
3004         t = skip_mods_and_typedefs(btf, id, NULL);
3005         name = btf__name_by_offset(btf, t->name_off);
3006
3007         if (is_signed)
3008                 *is_signed = false;
3009         switch (btf_kind(t)) {
3010         case BTF_KIND_INT: {
3011                 int enc = btf_int_encoding(t);
3012
3013                 if (enc & BTF_INT_BOOL)
3014                         return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3015                 if (is_signed)
3016                         *is_signed = enc & BTF_INT_SIGNED;
3017                 if (t->size == 1)
3018                         return KCFG_CHAR;
3019                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3020                         return KCFG_UNKNOWN;
3021                 return KCFG_INT;
3022         }
3023         case BTF_KIND_ENUM:
3024                 if (t->size != 4)
3025                         return KCFG_UNKNOWN;
3026                 if (strcmp(name, "libbpf_tristate"))
3027                         return KCFG_UNKNOWN;
3028                 return KCFG_TRISTATE;
3029         case BTF_KIND_ARRAY:
3030                 if (btf_array(t)->nelems == 0)
3031                         return KCFG_UNKNOWN;
3032                 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3033                         return KCFG_UNKNOWN;
3034                 return KCFG_CHAR_ARR;
3035         default:
3036                 return KCFG_UNKNOWN;
3037         }
3038 }
3039
3040 static int cmp_externs(const void *_a, const void *_b)
3041 {
3042         const struct extern_desc *a = _a;
3043         const struct extern_desc *b = _b;
3044
3045         if (a->type != b->type)
3046                 return a->type < b->type ? -1 : 1;
3047
3048         if (a->type == EXT_KCFG) {
3049                 /* descending order by alignment requirements */
3050                 if (a->kcfg.align != b->kcfg.align)
3051                         return a->kcfg.align > b->kcfg.align ? -1 : 1;
3052                 /* ascending order by size, within same alignment class */
3053                 if (a->kcfg.sz != b->kcfg.sz)
3054                         return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3055         }
3056
3057         /* resolve ties by name */
3058         return strcmp(a->name, b->name);
3059 }
3060
3061 static int find_int_btf_id(const struct btf *btf)
3062 {
3063         const struct btf_type *t;
3064         int i, n;
3065
3066         n = btf__get_nr_types(btf);
3067         for (i = 1; i <= n; i++) {
3068                 t = btf__type_by_id(btf, i);
3069
3070                 if (btf_is_int(t) && btf_int_bits(t) == 32)
3071                         return i;
3072         }
3073
3074         return 0;
3075 }
3076
3077 static int bpf_object__collect_externs(struct bpf_object *obj)
3078 {
3079         struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3080         const struct btf_type *t;
3081         struct extern_desc *ext;
3082         int i, n, off;
3083         const char *ext_name, *sec_name;
3084         Elf_Scn *scn;
3085         GElf_Shdr sh;
3086
3087         if (!obj->efile.symbols)
3088                 return 0;
3089
3090         scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3091         if (elf_sec_hdr(obj, scn, &sh))
3092                 return -LIBBPF_ERRNO__FORMAT;
3093
3094         n = sh.sh_size / sh.sh_entsize;
3095         pr_debug("looking for externs among %d symbols...\n", n);
3096
3097         for (i = 0; i < n; i++) {
3098                 GElf_Sym sym;
3099
3100                 if (!gelf_getsym(obj->efile.symbols, i, &sym))
3101                         return -LIBBPF_ERRNO__FORMAT;
3102                 if (!sym_is_extern(&sym))
3103                         continue;
3104                 ext_name = elf_sym_str(obj, sym.st_name);
3105                 if (!ext_name || !ext_name[0])
3106                         continue;
3107
3108                 ext = obj->externs;
3109                 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3110                 if (!ext)
3111                         return -ENOMEM;
3112                 obj->externs = ext;
3113                 ext = &ext[obj->nr_extern];
3114                 memset(ext, 0, sizeof(*ext));
3115                 obj->nr_extern++;
3116
3117                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3118                 if (ext->btf_id <= 0) {
3119                         pr_warn("failed to find BTF for extern '%s': %d\n",
3120                                 ext_name, ext->btf_id);
3121                         return ext->btf_id;
3122                 }
3123                 t = btf__type_by_id(obj->btf, ext->btf_id);
3124                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
3125                 ext->sym_idx = i;
3126                 ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
3127
3128                 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3129                 if (ext->sec_btf_id <= 0) {
3130                         pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3131                                 ext_name, ext->btf_id, ext->sec_btf_id);
3132                         return ext->sec_btf_id;
3133                 }
3134                 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3135                 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3136
3137                 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3138                         kcfg_sec = sec;
3139                         ext->type = EXT_KCFG;
3140                         ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3141                         if (ext->kcfg.sz <= 0) {
3142                                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3143                                         ext_name, ext->kcfg.sz);
3144                                 return ext->kcfg.sz;
3145                         }
3146                         ext->kcfg.align = btf__align_of(obj->btf, t->type);
3147                         if (ext->kcfg.align <= 0) {
3148                                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3149                                         ext_name, ext->kcfg.align);
3150                                 return -EINVAL;
3151                         }
3152                         ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3153                                                         &ext->kcfg.is_signed);
3154                         if (ext->kcfg.type == KCFG_UNKNOWN) {
3155                                 pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
3156                                 return -ENOTSUP;
3157                         }
3158                 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3159                         const struct btf_type *vt;
3160
3161                         ksym_sec = sec;
3162                         ext->type = EXT_KSYM;
3163
3164                         vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
3165                         if (!btf_is_void(vt)) {
3166                                 pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
3167                                 return -ENOTSUP;
3168                         }
3169                 } else {
3170                         pr_warn("unrecognized extern section '%s'\n", sec_name);
3171                         return -ENOTSUP;
3172                 }
3173         }
3174         pr_debug("collected %d externs total\n", obj->nr_extern);
3175
3176         if (!obj->nr_extern)
3177                 return 0;
3178
3179         /* sort externs by type, for kcfg ones also by (align, size, name) */
3180         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3181
3182         /* for .ksyms section, we need to turn all externs into allocated
3183          * variables in BTF to pass kernel verification; we do this by
3184          * pretending that each extern is a 8-byte variable
3185          */
3186         if (ksym_sec) {
3187                 /* find existing 4-byte integer type in BTF to use for fake
3188                  * extern variables in DATASEC
3189                  */
3190                 int int_btf_id = find_int_btf_id(obj->btf);
3191
3192                 for (i = 0; i < obj->nr_extern; i++) {
3193                         ext = &obj->externs[i];
3194                         if (ext->type != EXT_KSYM)
3195                                 continue;
3196                         pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3197                                  i, ext->sym_idx, ext->name);
3198                 }
3199
3200                 sec = ksym_sec;
3201                 n = btf_vlen(sec);
3202                 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3203                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3204                         struct btf_type *vt;
3205
3206                         vt = (void *)btf__type_by_id(obj->btf, vs->type);
3207                         ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3208                         ext = find_extern_by_name(obj, ext_name);
3209                         if (!ext) {
3210                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3211                                         ext_name);
3212                                 return -ESRCH;
3213                         }
3214                         btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3215                         vt->type = int_btf_id;
3216                         vs->offset = off;
3217                         vs->size = sizeof(int);
3218                 }
3219                 sec->size = off;
3220         }
3221
3222         if (kcfg_sec) {
3223                 sec = kcfg_sec;
3224                 /* for kcfg externs calculate their offsets within a .kconfig map */
3225                 off = 0;
3226                 for (i = 0; i < obj->nr_extern; i++) {
3227                         ext = &obj->externs[i];
3228                         if (ext->type != EXT_KCFG)
3229                                 continue;
3230
3231                         ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3232                         off = ext->kcfg.data_off + ext->kcfg.sz;
3233                         pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3234                                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3235                 }
3236                 sec->size = off;
3237                 n = btf_vlen(sec);
3238                 for (i = 0; i < n; i++) {
3239                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3240
3241                         t = btf__type_by_id(obj->btf, vs->type);
3242                         ext_name = btf__name_by_offset(obj->btf, t->name_off);
3243                         ext = find_extern_by_name(obj, ext_name);
3244                         if (!ext) {
3245                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3246                                         ext_name);
3247                                 return -ESRCH;
3248                         }
3249                         btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3250                         vs->offset = ext->kcfg.data_off;
3251                 }
3252         }
3253         return 0;
3254 }
3255
3256 struct bpf_program *
3257 bpf_object__find_program_by_title(const struct bpf_object *obj,
3258                                   const char *title)
3259 {
3260         struct bpf_program *pos;
3261
3262         bpf_object__for_each_program(pos, obj) {
3263                 if (pos->sec_name && !strcmp(pos->sec_name, title))
3264                         return pos;
3265         }
3266         return NULL;
3267 }
3268
3269 static bool prog_is_subprog(const struct bpf_object *obj,
3270                             const struct bpf_program *prog)
3271 {
3272         return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls;
3273 }
3274
3275 struct bpf_program *
3276 bpf_object__find_program_by_name(const struct bpf_object *obj,
3277                                  const char *name)
3278 {
3279         struct bpf_program *prog;
3280
3281         bpf_object__for_each_program(prog, obj) {
3282                 if (prog_is_subprog(obj, prog))
3283                         continue;
3284                 if (!strcmp(prog->name, name))
3285                         return prog;
3286         }
3287         return NULL;
3288 }
3289
3290 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3291                                       int shndx)
3292 {
3293         return shndx == obj->efile.data_shndx ||
3294                shndx == obj->efile.bss_shndx ||
3295                shndx == obj->efile.rodata_shndx;
3296 }
3297
3298 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
3299                                       int shndx)
3300 {
3301         return shndx == obj->efile.maps_shndx ||
3302                shndx == obj->efile.btf_maps_shndx;
3303 }
3304
3305 static enum libbpf_map_type
3306 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
3307 {
3308         if (shndx == obj->efile.data_shndx)
3309                 return LIBBPF_MAP_DATA;
3310         else if (shndx == obj->efile.bss_shndx)
3311                 return LIBBPF_MAP_BSS;
3312         else if (shndx == obj->efile.rodata_shndx)
3313                 return LIBBPF_MAP_RODATA;
3314         else if (shndx == obj->efile.symbols_shndx)
3315                 return LIBBPF_MAP_KCONFIG;
3316         else
3317                 return LIBBPF_MAP_UNSPEC;
3318 }
3319
3320 static int bpf_program__record_reloc(struct bpf_program *prog,
3321                                      struct reloc_desc *reloc_desc,
3322                                      __u32 insn_idx, const char *sym_name,
3323                                      const GElf_Sym *sym, const GElf_Rel *rel)
3324 {
3325         struct bpf_insn *insn = &prog->insns[insn_idx];
3326         size_t map_idx, nr_maps = prog->obj->nr_maps;
3327         struct bpf_object *obj = prog->obj;
3328         __u32 shdr_idx = sym->st_shndx;
3329         enum libbpf_map_type type;
3330         const char *sym_sec_name;
3331         struct bpf_map *map;
3332
3333         reloc_desc->processed = false;
3334
3335         /* sub-program call relocation */
3336         if (insn->code == (BPF_JMP | BPF_CALL)) {
3337                 if (insn->src_reg != BPF_PSEUDO_CALL) {
3338                         pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
3339                         return -LIBBPF_ERRNO__RELOC;
3340                 }
3341                 /* text_shndx can be 0, if no default "main" program exists */
3342                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
3343                         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3344                         pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
3345                                 prog->name, sym_name, sym_sec_name);
3346                         return -LIBBPF_ERRNO__RELOC;
3347                 }
3348                 if (sym->st_value % BPF_INSN_SZ) {
3349                         pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
3350                                 prog->name, sym_name, (size_t)sym->st_value);
3351                         return -LIBBPF_ERRNO__RELOC;
3352                 }
3353                 reloc_desc->type = RELO_CALL;
3354                 reloc_desc->insn_idx = insn_idx;
3355                 reloc_desc->sym_off = sym->st_value;
3356                 return 0;
3357         }
3358
3359         if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
3360                 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
3361                         prog->name, sym_name, insn_idx, insn->code);
3362                 return -LIBBPF_ERRNO__RELOC;
3363         }
3364
3365         if (sym_is_extern(sym)) {
3366                 int sym_idx = GELF_R_SYM(rel->r_info);
3367                 int i, n = obj->nr_extern;
3368                 struct extern_desc *ext;
3369
3370                 for (i = 0; i < n; i++) {
3371                         ext = &obj->externs[i];
3372                         if (ext->sym_idx == sym_idx)
3373                                 break;
3374                 }
3375                 if (i >= n) {
3376                         pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
3377                                 prog->name, sym_name, sym_idx);
3378                         return -LIBBPF_ERRNO__RELOC;
3379                 }
3380                 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
3381                          prog->name, i, ext->name, ext->sym_idx, insn_idx);
3382                 reloc_desc->type = RELO_EXTERN;
3383                 reloc_desc->insn_idx = insn_idx;
3384                 reloc_desc->sym_off = i; /* sym_off stores extern index */
3385                 return 0;
3386         }
3387
3388         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
3389                 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
3390                         prog->name, sym_name, shdr_idx);
3391                 return -LIBBPF_ERRNO__RELOC;
3392         }
3393
3394         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
3395         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3396
3397         /* generic map reference relocation */
3398         if (type == LIBBPF_MAP_UNSPEC) {
3399                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
3400                         pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
3401                                 prog->name, sym_name, sym_sec_name);
3402                         return -LIBBPF_ERRNO__RELOC;
3403                 }
3404                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3405                         map = &obj->maps[map_idx];
3406                         if (map->libbpf_type != type ||
3407                             map->sec_idx != sym->st_shndx ||
3408                             map->sec_offset != sym->st_value)
3409                                 continue;
3410                         pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
3411                                  prog->name, map_idx, map->name, map->sec_idx,
3412                                  map->sec_offset, insn_idx);
3413                         break;
3414                 }
3415                 if (map_idx >= nr_maps) {
3416                         pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
3417                                 prog->name, sym_sec_name, (size_t)sym->st_value);
3418                         return -LIBBPF_ERRNO__RELOC;
3419                 }
3420                 reloc_desc->type = RELO_LD64;
3421                 reloc_desc->insn_idx = insn_idx;
3422                 reloc_desc->map_idx = map_idx;
3423                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
3424                 return 0;
3425         }
3426
3427         /* global data map relocation */
3428         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
3429                 pr_warn("prog '%s': bad data relo against section '%s'\n",
3430                         prog->name, sym_sec_name);
3431                 return -LIBBPF_ERRNO__RELOC;
3432         }
3433         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3434                 map = &obj->maps[map_idx];
3435                 if (map->libbpf_type != type)
3436                         continue;
3437                 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
3438                          prog->name, map_idx, map->name, map->sec_idx,
3439                          map->sec_offset, insn_idx);
3440                 break;
3441         }
3442         if (map_idx >= nr_maps) {
3443                 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
3444                         prog->name, sym_sec_name);
3445                 return -LIBBPF_ERRNO__RELOC;
3446         }
3447
3448         reloc_desc->type = RELO_DATA;
3449         reloc_desc->insn_idx = insn_idx;
3450         reloc_desc->map_idx = map_idx;
3451         reloc_desc->sym_off = sym->st_value;
3452         return 0;
3453 }
3454
3455 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
3456 {
3457         return insn_idx >= prog->sec_insn_off &&
3458                insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
3459 }
3460
3461 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
3462                                                  size_t sec_idx, size_t insn_idx)
3463 {
3464         int l = 0, r = obj->nr_programs - 1, m;
3465         struct bpf_program *prog;
3466
3467         while (l < r) {
3468                 m = l + (r - l + 1) / 2;
3469                 prog = &obj->programs[m];
3470
3471                 if (prog->sec_idx < sec_idx ||
3472                     (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
3473                         l = m;
3474                 else
3475                         r = m - 1;
3476         }
3477         /* matching program could be at index l, but it still might be the
3478          * wrong one, so we need to double check conditions for the last time
3479          */
3480         prog = &obj->programs[l];
3481         if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
3482                 return prog;
3483         return NULL;
3484 }
3485
3486 static int
3487 bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data)
3488 {
3489         Elf_Data *symbols = obj->efile.symbols;
3490         const char *relo_sec_name, *sec_name;
3491         size_t sec_idx = shdr->sh_info;
3492         struct bpf_program *prog;
3493         struct reloc_desc *relos;
3494         int err, i, nrels;
3495         const char *sym_name;
3496         __u32 insn_idx;
3497         GElf_Sym sym;
3498         GElf_Rel rel;
3499
3500         relo_sec_name = elf_sec_str(obj, shdr->sh_name);
3501         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
3502         if (!relo_sec_name || !sec_name)
3503                 return -EINVAL;
3504
3505         pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
3506                  relo_sec_name, sec_idx, sec_name);
3507         nrels = shdr->sh_size / shdr->sh_entsize;
3508
3509         for (i = 0; i < nrels; i++) {
3510                 if (!gelf_getrel(data, i, &rel)) {
3511                         pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
3512                         return -LIBBPF_ERRNO__FORMAT;
3513                 }
3514                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
3515                         pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
3516                                 relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
3517                         return -LIBBPF_ERRNO__FORMAT;
3518                 }
3519                 if (rel.r_offset % BPF_INSN_SZ) {
3520                         pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
3521                                 relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
3522                         return -LIBBPF_ERRNO__FORMAT;
3523                 }
3524
3525                 insn_idx = rel.r_offset / BPF_INSN_SZ;
3526                 /* relocations against static functions are recorded as
3527                  * relocations against the section that contains a function;
3528                  * in such case, symbol will be STT_SECTION and sym.st_name
3529                  * will point to empty string (0), so fetch section name
3530                  * instead
3531                  */
3532                 if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0)
3533                         sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx));
3534                 else
3535                         sym_name = elf_sym_str(obj, sym.st_name);
3536                 sym_name = sym_name ?: "<?";
3537
3538                 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
3539                          relo_sec_name, i, insn_idx, sym_name);
3540
3541                 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
3542                 if (!prog) {
3543                         pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
3544                                 relo_sec_name, i, sec_name, insn_idx);
3545                         return -LIBBPF_ERRNO__RELOC;
3546                 }
3547
3548                 relos = libbpf_reallocarray(prog->reloc_desc,
3549                                             prog->nr_reloc + 1, sizeof(*relos));
3550                 if (!relos)
3551                         return -ENOMEM;
3552                 prog->reloc_desc = relos;
3553
3554                 /* adjust insn_idx to local BPF program frame of reference */
3555                 insn_idx -= prog->sec_insn_off;
3556                 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
3557                                                 insn_idx, sym_name, &sym, &rel);
3558                 if (err)
3559                         return err;
3560
3561                 prog->nr_reloc++;
3562         }
3563         return 0;
3564 }
3565
3566 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
3567 {
3568         struct bpf_map_def *def = &map->def;
3569         __u32 key_type_id = 0, value_type_id = 0;
3570         int ret;
3571
3572         /* if it's BTF-defined map, we don't need to search for type IDs.
3573          * For struct_ops map, it does not need btf_key_type_id and
3574          * btf_value_type_id.
3575          */
3576         if (map->sec_idx == obj->efile.btf_maps_shndx ||
3577             bpf_map__is_struct_ops(map))
3578                 return 0;
3579
3580         if (!bpf_map__is_internal(map)) {
3581                 ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
3582                                            def->value_size, &key_type_id,
3583                                            &value_type_id);
3584         } else {
3585                 /*
3586                  * LLVM annotates global data differently in BTF, that is,
3587                  * only as '.data', '.bss' or '.rodata'.
3588                  */
3589                 ret = btf__find_by_name(obj->btf,
3590                                 libbpf_type_to_btf_name[map->libbpf_type]);
3591         }
3592         if (ret < 0)
3593                 return ret;
3594
3595         map->btf_key_type_id = key_type_id;
3596         map->btf_value_type_id = bpf_map__is_internal(map) ?
3597                                  ret : value_type_id;
3598         return 0;
3599 }
3600
3601 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
3602 {
3603         struct bpf_map_info info = {};
3604         __u32 len = sizeof(info);
3605         int new_fd, err;
3606         char *new_name;
3607
3608         err = bpf_obj_get_info_by_fd(fd, &info, &len);
3609         if (err)
3610                 return err;
3611
3612         new_name = strdup(info.name);
3613         if (!new_name)
3614                 return -errno;
3615
3616         new_fd = open("/", O_RDONLY | O_CLOEXEC);
3617         if (new_fd < 0) {
3618                 err = -errno;
3619                 goto err_free_new_name;
3620         }
3621
3622         new_fd = dup3(fd, new_fd, O_CLOEXEC);
3623         if (new_fd < 0) {
3624                 err = -errno;
3625                 goto err_close_new_fd;
3626         }
3627
3628         err = zclose(map->fd);
3629         if (err) {
3630                 err = -errno;
3631                 goto err_close_new_fd;
3632         }
3633         free(map->name);
3634
3635         map->fd = new_fd;
3636         map->name = new_name;
3637         map->def.type = info.type;
3638         map->def.key_size = info.key_size;
3639         map->def.value_size = info.value_size;
3640         map->def.max_entries = info.max_entries;
3641         map->def.map_flags = info.map_flags;
3642         map->btf_key_type_id = info.btf_key_type_id;
3643         map->btf_value_type_id = info.btf_value_type_id;
3644         map->reused = true;
3645
3646         return 0;
3647
3648 err_close_new_fd:
3649         close(new_fd);
3650 err_free_new_name:
3651         free(new_name);
3652         return err;
3653 }
3654
3655 __u32 bpf_map__max_entries(const struct bpf_map *map)
3656 {
3657         return map->def.max_entries;
3658 }
3659
3660 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
3661 {
3662         if (map->fd >= 0)
3663                 return -EBUSY;
3664         map->def.max_entries = max_entries;
3665         return 0;
3666 }
3667
3668 int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
3669 {
3670         if (!map || !max_entries)
3671                 return -EINVAL;
3672
3673         return bpf_map__set_max_entries(map, max_entries);
3674 }
3675
3676 static int
3677 bpf_object__probe_loading(struct bpf_object *obj)
3678 {
3679         struct bpf_load_program_attr attr;
3680         char *cp, errmsg[STRERR_BUFSIZE];
3681         struct bpf_insn insns[] = {
3682                 BPF_MOV64_IMM(BPF_REG_0, 0),
3683                 BPF_EXIT_INSN(),
3684         };
3685         int ret;
3686
3687         /* make sure basic loading works */
3688
3689         memset(&attr, 0, sizeof(attr));
3690         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3691         attr.insns = insns;
3692         attr.insns_cnt = ARRAY_SIZE(insns);
3693         attr.license = "GPL";
3694
3695         ret = bpf_load_program_xattr(&attr, NULL, 0);
3696         if (ret < 0) {
3697                 ret = errno;
3698                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3699                 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
3700                         "program. Make sure your kernel supports BPF "
3701                         "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
3702                         "set to big enough value.\n", __func__, cp, ret);
3703                 return -ret;
3704         }
3705         close(ret);
3706
3707         return 0;
3708 }
3709
3710 static int probe_fd(int fd)
3711 {
3712         if (fd >= 0)
3713                 close(fd);
3714         return fd >= 0;
3715 }
3716
3717 static int probe_kern_prog_name(void)
3718 {
3719         struct bpf_load_program_attr attr;
3720         struct bpf_insn insns[] = {
3721                 BPF_MOV64_IMM(BPF_REG_0, 0),
3722                 BPF_EXIT_INSN(),
3723         };
3724         int ret;
3725
3726         /* make sure loading with name works */
3727
3728         memset(&attr, 0, sizeof(attr));
3729         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3730         attr.insns = insns;
3731         attr.insns_cnt = ARRAY_SIZE(insns);
3732         attr.license = "GPL";
3733         attr.name = "test";
3734         ret = bpf_load_program_xattr(&attr, NULL, 0);
3735         return probe_fd(ret);
3736 }
3737
3738 static int probe_kern_global_data(void)
3739 {
3740         struct bpf_load_program_attr prg_attr;
3741         struct bpf_create_map_attr map_attr;
3742         char *cp, errmsg[STRERR_BUFSIZE];
3743         struct bpf_insn insns[] = {
3744                 BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
3745                 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
3746                 BPF_MOV64_IMM(BPF_REG_0, 0),
3747                 BPF_EXIT_INSN(),
3748         };
3749         int ret, map;
3750
3751         memset(&map_attr, 0, sizeof(map_attr));
3752         map_attr.map_type = BPF_MAP_TYPE_ARRAY;
3753         map_attr.key_size = sizeof(int);
3754         map_attr.value_size = 32;
3755         map_attr.max_entries = 1;
3756
3757         map = bpf_create_map_xattr(&map_attr);
3758         if (map < 0) {
3759                 ret = -errno;
3760                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3761                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
3762                         __func__, cp, -ret);
3763                 return ret;
3764         }
3765
3766         insns[0].imm = map;
3767
3768         memset(&prg_attr, 0, sizeof(prg_attr));
3769         prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3770         prg_attr.insns = insns;
3771         prg_attr.insns_cnt = ARRAY_SIZE(insns);
3772         prg_attr.license = "GPL";
3773
3774         ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
3775         close(map);
3776         return probe_fd(ret);
3777 }
3778
3779 static int probe_kern_btf(void)
3780 {
3781         static const char strs[] = "\0int";
3782         __u32 types[] = {
3783                 /* int */
3784                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
3785         };
3786
3787         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3788                                              strs, sizeof(strs)));
3789 }
3790
3791 static int probe_kern_btf_func(void)
3792 {
3793         static const char strs[] = "\0int\0x\0a";
3794         /* void x(int a) {} */
3795         __u32 types[] = {
3796                 /* int */
3797                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
3798                 /* FUNC_PROTO */                                /* [2] */
3799                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
3800                 BTF_PARAM_ENC(7, 1),
3801                 /* FUNC x */                                    /* [3] */
3802                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
3803         };
3804
3805         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3806                                              strs, sizeof(strs)));
3807 }
3808
3809 static int probe_kern_btf_func_global(void)
3810 {
3811         static const char strs[] = "\0int\0x\0a";
3812         /* static void x(int a) {} */
3813         __u32 types[] = {
3814                 /* int */
3815                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
3816                 /* FUNC_PROTO */                                /* [2] */
3817                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
3818                 BTF_PARAM_ENC(7, 1),
3819                 /* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
3820                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
3821         };
3822
3823         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3824                                              strs, sizeof(strs)));
3825 }
3826
3827 static int probe_kern_btf_datasec(void)
3828 {
3829         static const char strs[] = "\0x\0.data";
3830         /* static int a; */
3831         __u32 types[] = {
3832                 /* int */
3833                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
3834                 /* VAR x */                                     /* [2] */
3835                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
3836                 BTF_VAR_STATIC,
3837                 /* DATASEC val */                               /* [3] */
3838                 BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
3839                 BTF_VAR_SECINFO_ENC(2, 0, 4),
3840         };
3841
3842         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3843                                              strs, sizeof(strs)));
3844 }
3845
3846 static int probe_kern_array_mmap(void)
3847 {
3848         struct bpf_create_map_attr attr = {
3849                 .map_type = BPF_MAP_TYPE_ARRAY,
3850                 .map_flags = BPF_F_MMAPABLE,
3851                 .key_size = sizeof(int),
3852                 .value_size = sizeof(int),
3853                 .max_entries = 1,
3854         };
3855
3856         return probe_fd(bpf_create_map_xattr(&attr));
3857 }
3858
3859 static int probe_kern_exp_attach_type(void)
3860 {
3861         struct bpf_load_program_attr attr;
3862         struct bpf_insn insns[] = {
3863                 BPF_MOV64_IMM(BPF_REG_0, 0),
3864                 BPF_EXIT_INSN(),
3865         };
3866
3867         memset(&attr, 0, sizeof(attr));
3868         /* use any valid combination of program type and (optional)
3869          * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
3870          * to see if kernel supports expected_attach_type field for
3871          * BPF_PROG_LOAD command
3872          */
3873         attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
3874         attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
3875         attr.insns = insns;
3876         attr.insns_cnt = ARRAY_SIZE(insns);
3877         attr.license = "GPL";
3878
3879         return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
3880 }
3881
3882 static int probe_kern_probe_read_kernel(void)
3883 {
3884         struct bpf_load_program_attr attr;
3885         struct bpf_insn insns[] = {
3886                 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),   /* r1 = r10 (fp) */
3887                 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),  /* r1 += -8 */
3888                 BPF_MOV64_IMM(BPF_REG_2, 8),            /* r2 = 8 */
3889                 BPF_MOV64_IMM(BPF_REG_3, 0),            /* r3 = 0 */
3890                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
3891                 BPF_EXIT_INSN(),
3892         };
3893
3894         memset(&attr, 0, sizeof(attr));
3895         attr.prog_type = BPF_PROG_TYPE_KPROBE;
3896         attr.insns = insns;
3897         attr.insns_cnt = ARRAY_SIZE(insns);
3898         attr.license = "GPL";
3899
3900         return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
3901 }
3902
3903 static int probe_prog_bind_map(void)
3904 {
3905         struct bpf_load_program_attr prg_attr;
3906         struct bpf_create_map_attr map_attr;
3907         char *cp, errmsg[STRERR_BUFSIZE];
3908         struct bpf_insn insns[] = {
3909                 BPF_MOV64_IMM(BPF_REG_0, 0),
3910                 BPF_EXIT_INSN(),
3911         };
3912         int ret, map, prog;
3913
3914         memset(&map_attr, 0, sizeof(map_attr));
3915         map_attr.map_type = BPF_MAP_TYPE_ARRAY;
3916         map_attr.key_size = sizeof(int);
3917         map_attr.value_size = 32;
3918         map_attr.max_entries = 1;
3919
3920         map = bpf_create_map_xattr(&map_attr);
3921         if (map < 0) {
3922                 ret = -errno;
3923                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3924                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
3925                         __func__, cp, -ret);
3926                 return ret;
3927         }
3928
3929         memset(&prg_attr, 0, sizeof(prg_attr));
3930         prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3931         prg_attr.insns = insns;
3932         prg_attr.insns_cnt = ARRAY_SIZE(insns);
3933         prg_attr.license = "GPL";
3934
3935         prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
3936         if (prog < 0) {
3937                 close(map);
3938                 return 0;
3939         }
3940
3941         ret = bpf_prog_bind_map(prog, map, NULL);
3942
3943         close(map);
3944         close(prog);
3945
3946         return ret >= 0;
3947 }
3948
3949 enum kern_feature_result {
3950         FEAT_UNKNOWN = 0,
3951         FEAT_SUPPORTED = 1,
3952         FEAT_MISSING = 2,
3953 };
3954
3955 typedef int (*feature_probe_fn)(void);
3956
3957 static struct kern_feature_desc {
3958         const char *desc;
3959         feature_probe_fn probe;
3960         enum kern_feature_result res;
3961 } feature_probes[__FEAT_CNT] = {
3962         [FEAT_PROG_NAME] = {
3963                 "BPF program name", probe_kern_prog_name,
3964         },
3965         [FEAT_GLOBAL_DATA] = {
3966                 "global variables", probe_kern_global_data,
3967         },
3968         [FEAT_BTF] = {
3969                 "minimal BTF", probe_kern_btf,
3970         },
3971         [FEAT_BTF_FUNC] = {
3972                 "BTF functions", probe_kern_btf_func,
3973         },
3974         [FEAT_BTF_GLOBAL_FUNC] = {
3975                 "BTF global function", probe_kern_btf_func_global,
3976         },
3977         [FEAT_BTF_DATASEC] = {
3978                 "BTF data section and variable", probe_kern_btf_datasec,
3979         },
3980         [FEAT_ARRAY_MMAP] = {
3981                 "ARRAY map mmap()", probe_kern_array_mmap,
3982         },
3983         [FEAT_EXP_ATTACH_TYPE] = {
3984                 "BPF_PROG_LOAD expected_attach_type attribute",
3985                 probe_kern_exp_attach_type,
3986         },
3987         [FEAT_PROBE_READ_KERN] = {
3988                 "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
3989         },
3990         [FEAT_PROG_BIND_MAP] = {
3991                 "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
3992         }
3993 };
3994
3995 static bool kernel_supports(enum kern_feature_id feat_id)
3996 {
3997         struct kern_feature_desc *feat = &feature_probes[feat_id];
3998         int ret;
3999
4000         if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
4001                 ret = feat->probe();
4002                 if (ret > 0) {
4003                         WRITE_ONCE(feat->res, FEAT_SUPPORTED);
4004                 } else if (ret == 0) {
4005                         WRITE_ONCE(feat->res, FEAT_MISSING);
4006                 } else {
4007                         pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
4008                         WRITE_ONCE(feat->res, FEAT_MISSING);
4009                 }
4010         }
4011
4012         return READ_ONCE(feat->res) == FEAT_SUPPORTED;
4013 }
4014
4015 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4016 {
4017         struct bpf_map_info map_info = {};
4018         char msg[STRERR_BUFSIZE];
4019         __u32 map_info_len;
4020
4021         map_info_len = sizeof(map_info);
4022
4023         if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
4024                 pr_warn("failed to get map info for map FD %d: %s\n",
4025                         map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
4026                 return false;
4027         }
4028
4029         return (map_info.type == map->def.type &&
4030                 map_info.key_size == map->def.key_size &&
4031                 map_info.value_size == map->def.value_size &&
4032                 map_info.max_entries == map->def.max_entries &&
4033                 map_info.map_flags == map->def.map_flags);
4034 }
4035
4036 static int
4037 bpf_object__reuse_map(struct bpf_map *map)
4038 {
4039         char *cp, errmsg[STRERR_BUFSIZE];
4040         int err, pin_fd;
4041
4042         pin_fd = bpf_obj_get(map->pin_path);
4043         if (pin_fd < 0) {
4044                 err = -errno;
4045                 if (err == -ENOENT) {
4046                         pr_debug("found no pinned map to reuse at '%s'\n",
4047                                  map->pin_path);
4048                         return 0;
4049                 }
4050
4051                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4052                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
4053                         map->pin_path, cp);
4054                 return err;
4055         }
4056
4057         if (!map_is_reuse_compat(map, pin_fd)) {
4058                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4059                         map->pin_path);
4060                 close(pin_fd);
4061                 return -EINVAL;
4062         }
4063
4064         err = bpf_map__reuse_fd(map, pin_fd);
4065         if (err) {
4066                 close(pin_fd);
4067                 return err;
4068         }
4069         map->pinned = true;
4070         pr_debug("reused pinned map at '%s'\n", map->pin_path);
4071
4072         return 0;
4073 }
4074
4075 static int
4076 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4077 {
4078         enum libbpf_map_type map_type = map->libbpf_type;
4079         char *cp, errmsg[STRERR_BUFSIZE];
4080         int err, zero = 0;
4081
4082         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4083         if (err) {
4084                 err = -errno;
4085                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4086                 pr_warn("Error setting initial map(%s) contents: %s\n",
4087                         map->name, cp);
4088                 return err;
4089         }
4090
4091         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4092         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
4093                 err = bpf_map_freeze(map->fd);
4094                 if (err) {
4095                         err = -errno;
4096                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4097                         pr_warn("Error freezing map(%s) as read-only: %s\n",
4098                                 map->name, cp);
4099                         return err;
4100                 }
4101         }
4102         return 0;
4103 }
4104
4105 static void bpf_map__destroy(struct bpf_map *map);
4106
4107 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
4108 {
4109         struct bpf_create_map_attr create_attr;
4110         struct bpf_map_def *def = &map->def;
4111
4112         memset(&create_attr, 0, sizeof(create_attr));
4113
4114         if (kernel_supports(FEAT_PROG_NAME))
4115                 create_attr.name = map->name;
4116         create_attr.map_ifindex = map->map_ifindex;
4117         create_attr.map_type = def->type;
4118         create_attr.map_flags = def->map_flags;
4119         create_attr.key_size = def->key_size;
4120         create_attr.value_size = def->value_size;
4121         create_attr.numa_node = map->numa_node;
4122
4123         if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
4124                 int nr_cpus;
4125
4126                 nr_cpus = libbpf_num_possible_cpus();
4127                 if (nr_cpus < 0) {
4128                         pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
4129                                 map->name, nr_cpus);
4130                         return nr_cpus;
4131                 }
4132                 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
4133                 create_attr.max_entries = nr_cpus;
4134         } else {
4135                 create_attr.max_entries = def->max_entries;
4136         }
4137
4138         if (bpf_map__is_struct_ops(map))
4139                 create_attr.btf_vmlinux_value_type_id =
4140                         map->btf_vmlinux_value_type_id;
4141
4142         create_attr.btf_fd = 0;
4143         create_attr.btf_key_type_id = 0;
4144         create_attr.btf_value_type_id = 0;
4145         if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
4146                 create_attr.btf_fd = btf__fd(obj->btf);
4147                 create_attr.btf_key_type_id = map->btf_key_type_id;
4148                 create_attr.btf_value_type_id = map->btf_value_type_id;
4149         }
4150
4151         if (bpf_map_type__is_map_in_map(def->type)) {
4152                 if (map->inner_map) {
4153                         int err;
4154
4155                         err = bpf_object__create_map(obj, map->inner_map);
4156                         if (err) {
4157                                 pr_warn("map '%s': failed to create inner map: %d\n",
4158                                         map->name, err);
4159                                 return err;
4160                         }
4161                         map->inner_map_fd = bpf_map__fd(map->inner_map);
4162                 }
4163                 if (map->inner_map_fd >= 0)
4164                         create_attr.inner_map_fd = map->inner_map_fd;
4165         }
4166
4167         map->fd = bpf_create_map_xattr(&create_attr);
4168         if (map->fd < 0 && (create_attr.btf_key_type_id ||
4169                             create_attr.btf_value_type_id)) {
4170                 char *cp, errmsg[STRERR_BUFSIZE];
4171                 int err = -errno;
4172
4173                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4174                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
4175                         map->name, cp, err);
4176                 create_attr.btf_fd = 0;
4177                 create_attr.btf_key_type_id = 0;
4178                 create_attr.btf_value_type_id = 0;
4179                 map->btf_key_type_id = 0;
4180                 map->btf_value_type_id = 0;
4181                 map->fd = bpf_create_map_xattr(&create_attr);
4182         }
4183
4184         if (map->fd < 0)
4185                 return -errno;
4186
4187         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
4188                 bpf_map__destroy(map->inner_map);
4189                 zfree(&map->inner_map);
4190         }
4191
4192         return 0;
4193 }
4194
4195 static int
4196 bpf_object__create_maps(struct bpf_object *obj)
4197 {
4198         struct bpf_map *map;
4199         char *cp, errmsg[STRERR_BUFSIZE];
4200         unsigned int i, j;
4201         int err;
4202
4203         for (i = 0; i < obj->nr_maps; i++) {
4204                 map = &obj->maps[i];
4205
4206                 if (map->pin_path) {
4207                         err = bpf_object__reuse_map(map);
4208                         if (err) {
4209                                 pr_warn("map '%s': error reusing pinned map\n",
4210                                         map->name);
4211                                 goto err_out;
4212                         }
4213                 }
4214
4215                 if (map->fd >= 0) {
4216                         pr_debug("map '%s': skipping creation (preset fd=%d)\n",
4217                                  map->name, map->fd);
4218                         continue;
4219                 }
4220
4221                 err = bpf_object__create_map(obj, map);
4222                 if (err)
4223                         goto err_out;
4224
4225                 pr_debug("map '%s': created successfully, fd=%d\n", map->name,
4226                          map->fd);
4227
4228                 if (bpf_map__is_internal(map)) {
4229                         err = bpf_object__populate_internal_map(obj, map);
4230                         if (err < 0) {
4231                                 zclose(map->fd);
4232                                 goto err_out;
4233                         }
4234                 }
4235
4236                 if (map->init_slots_sz) {
4237                         for (j = 0; j < map->init_slots_sz; j++) {
4238                                 const struct bpf_map *targ_map;
4239                                 int fd;
4240
4241                                 if (!map->init_slots[j])
4242                                         continue;
4243
4244                                 targ_map = map->init_slots[j];
4245                                 fd = bpf_map__fd(targ_map);
4246                                 err = bpf_map_update_elem(map->fd, &j, &fd, 0);
4247                                 if (err) {
4248                                         err = -errno;
4249                                         pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
4250                                                 map->name, j, targ_map->name,
4251                                                 fd, err);
4252                                         goto err_out;
4253                                 }
4254                                 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
4255                                          map->name, j, targ_map->name, fd);
4256                         }
4257                         zfree(&map->init_slots);
4258                         map->init_slots_sz = 0;
4259                 }
4260
4261                 if (map->pin_path && !map->pinned) {
4262                         err = bpf_map__pin(map, NULL);
4263                         if (err) {
4264                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
4265                                         map->name, map->pin_path, err);
4266                                 zclose(map->fd);
4267                                 goto err_out;
4268                         }
4269                 }
4270         }
4271
4272         return 0;
4273
4274 err_out:
4275         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4276         pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
4277         pr_perm_msg(err);
4278         for (j = 0; j < i; j++)
4279                 zclose(obj->maps[j].fd);
4280         return err;
4281 }
4282
4283 #define BPF_CORE_SPEC_MAX_LEN 64
4284
4285 /* represents BPF CO-RE field or array element accessor */
4286 struct bpf_core_accessor {
4287         __u32 type_id;          /* struct/union type or array element type */
4288         __u32 idx;              /* field index or array index */
4289         const char *name;       /* field name or NULL for array accessor */
4290 };
4291
4292 struct bpf_core_spec {
4293         const struct btf *btf;
4294         /* high-level spec: named fields and array indices only */
4295         struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
4296         /* original unresolved (no skip_mods_or_typedefs) root type ID */
4297         __u32 root_type_id;
4298         /* CO-RE relocation kind */
4299         enum bpf_core_relo_kind relo_kind;
4300         /* high-level spec length */
4301         int len;
4302         /* raw, low-level spec: 1-to-1 with accessor spec string */
4303         int raw_spec[BPF_CORE_SPEC_MAX_LEN];
4304         /* raw spec length */
4305         int raw_len;
4306         /* field bit offset represented by spec */
4307         __u32 bit_offset;
4308 };
4309
4310 static bool str_is_empty(const char *s)
4311 {
4312         return !s || !s[0];
4313 }
4314
4315 static bool is_flex_arr(const struct btf *btf,
4316                         const struct bpf_core_accessor *acc,
4317                         const struct btf_array *arr)
4318 {
4319         const struct btf_type *t;
4320
4321         /* not a flexible array, if not inside a struct or has non-zero size */
4322         if (!acc->name || arr->nelems > 0)
4323                 return false;
4324
4325         /* has to be the last member of enclosing struct */
4326         t = btf__type_by_id(btf, acc->type_id);
4327         return acc->idx == btf_vlen(t) - 1;
4328 }
4329
4330 static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
4331 {
4332         switch (kind) {
4333         case BPF_FIELD_BYTE_OFFSET: return "byte_off";
4334         case BPF_FIELD_BYTE_SIZE: return "byte_sz";
4335         case BPF_FIELD_EXISTS: return "field_exists";
4336         case BPF_FIELD_SIGNED: return "signed";
4337         case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
4338         case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
4339         case BPF_TYPE_ID_LOCAL: return "local_type_id";
4340         case BPF_TYPE_ID_TARGET: return "target_type_id";
4341         case BPF_TYPE_EXISTS: return "type_exists";
4342         case BPF_TYPE_SIZE: return "type_size";
4343         case BPF_ENUMVAL_EXISTS: return "enumval_exists";
4344         case BPF_ENUMVAL_VALUE: return "enumval_value";
4345         default: return "unknown";
4346         }
4347 }
4348
4349 static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
4350 {
4351         switch (kind) {
4352         case BPF_FIELD_BYTE_OFFSET:
4353         case BPF_FIELD_BYTE_SIZE:
4354         case BPF_FIELD_EXISTS:
4355         case BPF_FIELD_SIGNED:
4356         case BPF_FIELD_LSHIFT_U64:
4357         case BPF_FIELD_RSHIFT_U64:
4358                 return true;
4359         default:
4360                 return false;
4361         }
4362 }
4363
4364 static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
4365 {
4366         switch (kind) {
4367         case BPF_TYPE_ID_LOCAL:
4368         case BPF_TYPE_ID_TARGET:
4369         case BPF_TYPE_EXISTS:
4370         case BPF_TYPE_SIZE:
4371                 return true;
4372         default:
4373                 return false;
4374         }
4375 }
4376
4377 static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
4378 {
4379         switch (kind) {
4380         case BPF_ENUMVAL_EXISTS:
4381         case BPF_ENUMVAL_VALUE:
4382                 return true;
4383         default:
4384                 return false;
4385         }
4386 }
4387
4388 /*
4389  * Turn bpf_core_relo into a low- and high-level spec representation,
4390  * validating correctness along the way, as well as calculating resulting
4391  * field bit offset, specified by accessor string. Low-level spec captures
4392  * every single level of nestedness, including traversing anonymous
4393  * struct/union members. High-level one only captures semantically meaningful
4394  * "turning points": named fields and array indicies.
4395  * E.g., for this case:
4396  *
4397  *   struct sample {
4398  *       int __unimportant;
4399  *       struct {
4400  *           int __1;
4401  *           int __2;
4402  *           int a[7];
4403  *       };
4404  *   };
4405  *
4406  *   struct sample *s = ...;
4407  *
4408  *   int x = &s->a[3]; // access string = '0:1:2:3'
4409  *
4410  * Low-level spec has 1:1 mapping with each element of access string (it's
4411  * just a parsed access string representation): [0, 1, 2, 3].
4412  *
4413  * High-level spec will capture only 3 points:
4414  *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
4415  *   - field 'a' access (corresponds to '2' in low-level spec);
4416  *   - array element #3 access (corresponds to '3' in low-level spec).
4417  *
4418  * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
4419  * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
4420  * spec and raw_spec are kept empty.
4421  *
4422  * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
4423  * string to specify enumerator's value index that need to be relocated.
4424  */
4425 static int bpf_core_parse_spec(const struct btf *btf,
4426                                __u32 type_id,
4427                                const char *spec_str,
4428                                enum bpf_core_relo_kind relo_kind,
4429                                struct bpf_core_spec *spec)
4430 {
4431         int access_idx, parsed_len, i;
4432         struct bpf_core_accessor *acc;
4433         const struct btf_type *t;
4434         const char *name;
4435         __u32 id;
4436         __s64 sz;
4437
4438         if (str_is_empty(spec_str) || *spec_str == ':')
4439                 return -EINVAL;
4440
4441         memset(spec, 0, sizeof(*spec));
4442         spec->btf = btf;
4443         spec->root_type_id = type_id;
4444         spec->relo_kind = relo_kind;
4445
4446         /* type-based relocations don't have a field access string */
4447         if (core_relo_is_type_based(relo_kind)) {
4448                 if (strcmp(spec_str, "0"))
4449                         return -EINVAL;
4450                 return 0;
4451         }
4452
4453         /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
4454         while (*spec_str) {
4455                 if (*spec_str == ':')
4456                         ++spec_str;
4457                 if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
4458                         return -EINVAL;
4459                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4460                         return -E2BIG;
4461                 spec_str += parsed_len;
4462                 spec->raw_spec[spec->raw_len++] = access_idx;
4463         }
4464
4465         if (spec->raw_len == 0)
4466                 return -EINVAL;
4467
4468         t = skip_mods_and_typedefs(btf, type_id, &id);
4469         if (!t)
4470                 return -EINVAL;
4471
4472         access_idx = spec->raw_spec[0];
4473         acc = &spec->spec[0];
4474         acc->type_id = id;
4475         acc->idx = access_idx;
4476         spec->len++;
4477
4478         if (core_relo_is_enumval_based(relo_kind)) {
4479                 if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
4480                         return -EINVAL;
4481
4482                 /* record enumerator name in a first accessor */
4483                 acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
4484                 return 0;
4485         }
4486
4487         if (!core_relo_is_field_based(relo_kind))
4488                 return -EINVAL;
4489
4490         sz = btf__resolve_size(btf, id);
4491         if (sz < 0)
4492                 return sz;
4493         spec->bit_offset = access_idx * sz * 8;
4494
4495         for (i = 1; i < spec->raw_len; i++) {
4496                 t = skip_mods_and_typedefs(btf, id, &id);
4497                 if (!t)
4498                         return -EINVAL;
4499
4500                 access_idx = spec->raw_spec[i];
4501                 acc = &spec->spec[spec->len];
4502
4503                 if (btf_is_composite(t)) {
4504                         const struct btf_member *m;
4505                         __u32 bit_offset;
4506
4507                         if (access_idx >= btf_vlen(t))
4508                                 return -EINVAL;
4509
4510                         bit_offset = btf_member_bit_offset(t, access_idx);
4511                         spec->bit_offset += bit_offset;
4512
4513                         m = btf_members(t) + access_idx;
4514                         if (m->name_off) {
4515                                 name = btf__name_by_offset(btf, m->name_off);
4516                                 if (str_is_empty(name))
4517                                         return -EINVAL;
4518
4519                                 acc->type_id = id;
4520                                 acc->idx = access_idx;
4521                                 acc->name = name;
4522                                 spec->len++;
4523                         }
4524
4525                         id = m->type;
4526                 } else if (btf_is_array(t)) {
4527                         const struct btf_array *a = btf_array(t);
4528                         bool flex;
4529
4530                         t = skip_mods_and_typedefs(btf, a->type, &id);
4531                         if (!t)
4532                                 return -EINVAL;
4533
4534                         flex = is_flex_arr(btf, acc - 1, a);
4535                         if (!flex && access_idx >= a->nelems)
4536                                 return -EINVAL;
4537
4538                         spec->spec[spec->len].type_id = id;
4539                         spec->spec[spec->len].idx = access_idx;
4540                         spec->len++;
4541
4542                         sz = btf__resolve_size(btf, id);
4543                         if (sz < 0)
4544                                 return sz;
4545                         spec->bit_offset += access_idx * sz * 8;
4546                 } else {
4547                         pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
4548                                 type_id, spec_str, i, id, btf_kind_str(t));
4549                         return -EINVAL;
4550                 }
4551         }
4552
4553         return 0;
4554 }
4555
4556 static bool bpf_core_is_flavor_sep(const char *s)
4557 {
4558         /* check X___Y name pattern, where X and Y are not underscores */
4559         return s[0] != '_' &&                                 /* X */
4560                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
4561                s[4] != '_';                                   /* Y */
4562 }
4563
4564 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
4565  * before last triple underscore. Struct name part after last triple
4566  * underscore is ignored by BPF CO-RE relocation during relocation matching.
4567  */
4568 static size_t bpf_core_essential_name_len(const char *name)
4569 {
4570         size_t n = strlen(name);
4571         int i;
4572
4573         for (i = n - 5; i >= 0; i--) {
4574                 if (bpf_core_is_flavor_sep(name + i))
4575                         return i + 1;
4576         }
4577         return n;
4578 }
4579
4580 /* dynamically sized list of type IDs */
4581 struct ids_vec {
4582         __u32 *data;
4583         int len;
4584 };
4585
4586 static void bpf_core_free_cands(struct ids_vec *cand_ids)
4587 {
4588         free(cand_ids->data);
4589         free(cand_ids);
4590 }
4591
4592 static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
4593                                            __u32 local_type_id,
4594                                            const struct btf *targ_btf)
4595 {
4596         size_t local_essent_len, targ_essent_len;
4597         const char *local_name, *targ_name;
4598         const struct btf_type *t, *local_t;
4599         struct ids_vec *cand_ids;
4600         __u32 *new_ids;
4601         int i, err, n;
4602
4603         local_t = btf__type_by_id(local_btf, local_type_id);
4604         if (!local_t)
4605                 return ERR_PTR(-EINVAL);
4606
4607         local_name = btf__name_by_offset(local_btf, local_t->name_off);
4608         if (str_is_empty(local_name))
4609                 return ERR_PTR(-EINVAL);
4610         local_essent_len = bpf_core_essential_name_len(local_name);
4611
4612         cand_ids = calloc(1, sizeof(*cand_ids));
4613         if (!cand_ids)
4614                 return ERR_PTR(-ENOMEM);
4615
4616         n = btf__get_nr_types(targ_btf);
4617         for (i = 1; i <= n; i++) {
4618                 t = btf__type_by_id(targ_btf, i);
4619                 if (btf_kind(t) != btf_kind(local_t))
4620                         continue;
4621
4622                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
4623                 if (str_is_empty(targ_name))
4624                         continue;
4625
4626                 targ_essent_len = bpf_core_essential_name_len(targ_name);
4627                 if (targ_essent_len != local_essent_len)
4628                         continue;
4629
4630                 if (strncmp(local_name, targ_name, local_essent_len) == 0) {
4631                         pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
4632                                  local_type_id, btf_kind_str(local_t),
4633                                  local_name, i, btf_kind_str(t), targ_name);
4634                         new_ids = libbpf_reallocarray(cand_ids->data,
4635                                                       cand_ids->len + 1,
4636                                                       sizeof(*cand_ids->data));
4637                         if (!new_ids) {
4638                                 err = -ENOMEM;
4639                                 goto err_out;
4640                         }
4641                         cand_ids->data = new_ids;
4642                         cand_ids->data[cand_ids->len++] = i;
4643                 }
4644         }
4645         return cand_ids;
4646 err_out:
4647         bpf_core_free_cands(cand_ids);
4648         return ERR_PTR(err);
4649 }
4650
4651 /* Check two types for compatibility for the purpose of field access
4652  * relocation. const/volatile/restrict and typedefs are skipped to ensure we
4653  * are relocating semantically compatible entities:
4654  *   - any two STRUCTs/UNIONs are compatible and can be mixed;
4655  *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
4656  *   - any two PTRs are always compatible;
4657  *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
4658  *     least one of enums should be anonymous;
4659  *   - for ENUMs, check sizes, names are ignored;
4660  *   - for INT, size and signedness are ignored;
4661  *   - for ARRAY, dimensionality is ignored, element types are checked for
4662  *     compatibility recursively;
4663  *   - everything else shouldn't be ever a target of relocation.
4664  * These rules are not set in stone and probably will be adjusted as we get
4665  * more experience with using BPF CO-RE relocations.
4666  */
4667 static int bpf_core_fields_are_compat(const struct btf *local_btf,
4668                                       __u32 local_id,
4669                                       const struct btf *targ_btf,
4670                                       __u32 targ_id)
4671 {
4672         const struct btf_type *local_type, *targ_type;
4673
4674 recur:
4675         local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
4676         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4677         if (!local_type || !targ_type)
4678                 return -EINVAL;
4679
4680         if (btf_is_composite(local_type) && btf_is_composite(targ_type))
4681                 return 1;
4682         if (btf_kind(local_type) != btf_kind(targ_type))
4683                 return 0;
4684
4685         switch (btf_kind(local_type)) {
4686         case BTF_KIND_PTR:
4687                 return 1;
4688         case BTF_KIND_FWD:
4689         case BTF_KIND_ENUM: {
4690                 const char *local_name, *targ_name;
4691                 size_t local_len, targ_len;
4692
4693                 local_name = btf__name_by_offset(local_btf,
4694                                                  local_type->name_off);
4695                 targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
4696                 local_len = bpf_core_essential_name_len(local_name);
4697                 targ_len = bpf_core_essential_name_len(targ_name);
4698                 /* one of them is anonymous or both w/ same flavor-less names */
4699                 return local_len == 0 || targ_len == 0 ||
4700                        (local_len == targ_len &&
4701                         strncmp(local_name, targ_name, local_len) == 0);
4702         }
4703         case BTF_KIND_INT:
4704                 /* just reject deprecated bitfield-like integers; all other
4705                  * integers are by default compatible between each other
4706                  */
4707                 return btf_int_offset(local_type) == 0 &&
4708                        btf_int_offset(targ_type) == 0;
4709         case BTF_KIND_ARRAY:
4710                 local_id = btf_array(local_type)->type;
4711                 targ_id = btf_array(targ_type)->type;
4712                 goto recur;
4713         default:
4714                 pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
4715                         btf_kind(local_type), local_id, targ_id);
4716                 return 0;
4717         }
4718 }
4719
4720 /*
4721  * Given single high-level named field accessor in local type, find
4722  * corresponding high-level accessor for a target type. Along the way,
4723  * maintain low-level spec for target as well. Also keep updating target
4724  * bit offset.
4725  *
4726  * Searching is performed through recursive exhaustive enumeration of all
4727  * fields of a struct/union. If there are any anonymous (embedded)
4728  * structs/unions, they are recursively searched as well. If field with
4729  * desired name is found, check compatibility between local and target types,
4730  * before returning result.
4731  *
4732  * 1 is returned, if field is found.
4733  * 0 is returned if no compatible field is found.
4734  * <0 is returned on error.
4735  */
4736 static int bpf_core_match_member(const struct btf *local_btf,
4737                                  const struct bpf_core_accessor *local_acc,
4738                                  const struct btf *targ_btf,
4739                                  __u32 targ_id,
4740                                  struct bpf_core_spec *spec,
4741                                  __u32 *next_targ_id)
4742 {
4743         const struct btf_type *local_type, *targ_type;
4744         const struct btf_member *local_member, *m;
4745         const char *local_name, *targ_name;
4746         __u32 local_id;
4747         int i, n, found;
4748
4749         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4750         if (!targ_type)
4751                 return -EINVAL;
4752         if (!btf_is_composite(targ_type))
4753                 return 0;
4754
4755         local_id = local_acc->type_id;
4756         local_type = btf__type_by_id(local_btf, local_id);
4757         local_member = btf_members(local_type) + local_acc->idx;
4758         local_name = btf__name_by_offset(local_btf, local_member->name_off);
4759
4760         n = btf_vlen(targ_type);
4761         m = btf_members(targ_type);
4762         for (i = 0; i < n; i++, m++) {
4763                 __u32 bit_offset;
4764
4765                 bit_offset = btf_member_bit_offset(targ_type, i);
4766
4767                 /* too deep struct/union/array nesting */
4768                 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4769                         return -E2BIG;
4770
4771                 /* speculate this member will be the good one */
4772                 spec->bit_offset += bit_offset;
4773                 spec->raw_spec[spec->raw_len++] = i;
4774
4775                 targ_name = btf__name_by_offset(targ_btf, m->name_off);
4776                 if (str_is_empty(targ_name)) {
4777                         /* embedded struct/union, we need to go deeper */
4778                         found = bpf_core_match_member(local_btf, local_acc,
4779                                                       targ_btf, m->type,
4780                                                       spec, next_targ_id);
4781                         if (found) /* either found or error */
4782                                 return found;
4783                 } else if (strcmp(local_name, targ_name) == 0) {
4784                         /* matching named field */
4785                         struct bpf_core_accessor *targ_acc;
4786
4787                         targ_acc = &spec->spec[spec->len++];
4788                         targ_acc->type_id = targ_id;
4789                         targ_acc->idx = i;
4790                         targ_acc->name = targ_name;
4791
4792                         *next_targ_id = m->type;
4793                         found = bpf_core_fields_are_compat(local_btf,
4794                                                            local_member->type,
4795                                                            targ_btf, m->type);
4796                         if (!found)
4797                                 spec->len--; /* pop accessor */
4798                         return found;
4799                 }
4800                 /* member turned out not to be what we looked for */
4801                 spec->bit_offset -= bit_offset;
4802                 spec->raw_len--;
4803         }
4804
4805         return 0;
4806 }
4807
4808 /* Check local and target types for compatibility. This check is used for
4809  * type-based CO-RE relocations and follow slightly different rules than
4810  * field-based relocations. This function assumes that root types were already
4811  * checked for name match. Beyond that initial root-level name check, names
4812  * are completely ignored. Compatibility rules are as follows:
4813  *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
4814  *     kind should match for local and target types (i.e., STRUCT is not
4815  *     compatible with UNION);
4816  *   - for ENUMs, the size is ignored;
4817  *   - for INT, size and signedness are ignored;
4818  *   - for ARRAY, dimensionality is ignored, element types are checked for
4819  *     compatibility recursively;
4820  *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
4821  *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
4822  *   - FUNC_PROTOs are compatible if they have compatible signature: same
4823  *     number of input args and compatible return and argument types.
4824  * These rules are not set in stone and probably will be adjusted as we get
4825  * more experience with using BPF CO-RE relocations.
4826  */
4827 static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
4828                                      const struct btf *targ_btf, __u32 targ_id)
4829 {
4830         const struct btf_type *local_type, *targ_type;
4831         int depth = 32; /* max recursion depth */
4832
4833         /* caller made sure that names match (ignoring flavor suffix) */
4834         local_type = btf__type_by_id(local_btf, local_id);
4835         targ_type = btf__type_by_id(targ_btf, targ_id);
4836         if (btf_kind(local_type) != btf_kind(targ_type))
4837                 return 0;
4838
4839 recur:
4840         depth--;
4841         if (depth < 0)
4842                 return -EINVAL;
4843
4844         local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
4845         targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4846         if (!local_type || !targ_type)
4847                 return -EINVAL;
4848
4849         if (btf_kind(local_type) != btf_kind(targ_type))
4850                 return 0;
4851
4852         switch (btf_kind(local_type)) {
4853         case BTF_KIND_UNKN:
4854         case BTF_KIND_STRUCT:
4855         case BTF_KIND_UNION:
4856         case BTF_KIND_ENUM:
4857         case BTF_KIND_FWD:
4858                 return 1;
4859         case BTF_KIND_INT:
4860                 /* just reject deprecated bitfield-like integers; all other
4861                  * integers are by default compatible between each other
4862                  */
4863                 return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
4864         case BTF_KIND_PTR:
4865                 local_id = local_type->type;
4866                 targ_id = targ_type->type;
4867                 goto recur;
4868         case BTF_KIND_ARRAY:
4869                 local_id = btf_array(local_type)->type;
4870                 targ_id = btf_array(targ_type)->type;
4871                 goto recur;
4872         case BTF_KIND_FUNC_PROTO: {
4873                 struct btf_param *local_p = btf_params(local_type);
4874                 struct btf_param *targ_p = btf_params(targ_type);
4875                 __u16 local_vlen = btf_vlen(local_type);
4876                 __u16 targ_vlen = btf_vlen(targ_type);
4877                 int i, err;
4878
4879                 if (local_vlen != targ_vlen)
4880                         return 0;
4881
4882                 for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
4883                         skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
4884                         skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
4885                         err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
4886                         if (err <= 0)
4887                                 return err;
4888                 }
4889
4890                 /* tail recurse for return type check */
4891                 skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
4892                 skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
4893                 goto recur;
4894         }
4895         default:
4896                 pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
4897                         btf_kind_str(local_type), local_id, targ_id);
4898                 return 0;
4899         }
4900 }
4901
4902 /*
4903  * Try to match local spec to a target type and, if successful, produce full
4904  * target spec (high-level, low-level + bit offset).
4905  */
4906 static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
4907                                const struct btf *targ_btf, __u32 targ_id,
4908                                struct bpf_core_spec *targ_spec)
4909 {
4910         const struct btf_type *targ_type;
4911         const struct bpf_core_accessor *local_acc;
4912         struct bpf_core_accessor *targ_acc;
4913         int i, sz, matched;
4914
4915         memset(targ_spec, 0, sizeof(*targ_spec));
4916         targ_spec->btf = targ_btf;
4917         targ_spec->root_type_id = targ_id;
4918         targ_spec->relo_kind = local_spec->relo_kind;
4919
4920         if (core_relo_is_type_based(local_spec->relo_kind)) {
4921                 return bpf_core_types_are_compat(local_spec->btf,
4922                                                  local_spec->root_type_id,
4923                                                  targ_btf, targ_id);
4924         }
4925
4926         local_acc = &local_spec->spec[0];
4927         targ_acc = &targ_spec->spec[0];
4928
4929         if (core_relo_is_enumval_based(local_spec->relo_kind)) {
4930                 size_t local_essent_len, targ_essent_len;
4931                 const struct btf_enum *e;
4932                 const char *targ_name;
4933
4934                 /* has to resolve to an enum */
4935                 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
4936                 if (!btf_is_enum(targ_type))
4937                         return 0;
4938
4939                 local_essent_len = bpf_core_essential_name_len(local_acc->name);
4940
4941                 for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
4942                         targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
4943                         targ_essent_len = bpf_core_essential_name_len(targ_name);
4944                         if (targ_essent_len != local_essent_len)
4945                                 continue;
4946                         if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
4947                                 targ_acc->type_id = targ_id;
4948                                 targ_acc->idx = i;
4949                                 targ_acc->name = targ_name;
4950                                 targ_spec->len++;
4951                                 targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
4952                                 targ_spec->raw_len++;
4953                                 return 1;
4954                         }
4955                 }
4956                 return 0;
4957         }
4958
4959         if (!core_relo_is_field_based(local_spec->relo_kind))
4960                 return -EINVAL;
4961
4962         for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
4963                 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
4964                                                    &targ_id);
4965                 if (!targ_type)
4966                         return -EINVAL;
4967
4968                 if (local_acc->name) {
4969                         matched = bpf_core_match_member(local_spec->btf,
4970                                                         local_acc,
4971                                                         targ_btf, targ_id,
4972                                                         targ_spec, &targ_id);
4973                         if (matched <= 0)
4974                                 return matched;
4975                 } else {
4976                         /* for i=0, targ_id is already treated as array element
4977                          * type (because it's the original struct), for others
4978                          * we should find array element type first
4979                          */
4980                         if (i > 0) {
4981                                 const struct btf_array *a;
4982                                 bool flex;
4983
4984                                 if (!btf_is_array(targ_type))
4985                                         return 0;
4986
4987                                 a = btf_array(targ_type);
4988                                 flex = is_flex_arr(targ_btf, targ_acc - 1, a);
4989                                 if (!flex && local_acc->idx >= a->nelems)
4990                                         return 0;
4991                                 if (!skip_mods_and_typedefs(targ_btf, a->type,
4992                                                             &targ_id))
4993                                         return -EINVAL;
4994                         }
4995
4996                         /* too deep struct/union/array nesting */
4997                         if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4998                                 return -E2BIG;
4999
5000                         targ_acc->type_id = targ_id;
5001                         targ_acc->idx = local_acc->idx;
5002                         targ_acc->name = NULL;
5003                         targ_spec->len++;
5004                         targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
5005                         targ_spec->raw_len++;
5006
5007                         sz = btf__resolve_size(targ_btf, targ_id);
5008                         if (sz < 0)
5009                                 return sz;
5010                         targ_spec->bit_offset += local_acc->idx * sz * 8;
5011                 }
5012         }
5013
5014         return 1;
5015 }
5016
5017 static int bpf_core_calc_field_relo(const struct bpf_program *prog,
5018                                     const struct bpf_core_relo *relo,
5019                                     const struct bpf_core_spec *spec,
5020                                     __u32 *val, bool *validate)
5021 {
5022         const struct bpf_core_accessor *acc;
5023         const struct btf_type *t;
5024         __u32 byte_off, byte_sz, bit_off, bit_sz;
5025         const struct btf_member *m;
5026         const struct btf_type *mt;
5027         bool bitfield;
5028         __s64 sz;
5029
5030         if (relo->kind == BPF_FIELD_EXISTS) {
5031                 *val = spec ? 1 : 0;
5032                 return 0;
5033         }
5034
5035         if (!spec)
5036                 return -EUCLEAN; /* request instruction poisoning */
5037
5038         acc = &spec->spec[spec->len - 1];
5039         t = btf__type_by_id(spec->btf, acc->type_id);
5040
5041         /* a[n] accessor needs special handling */
5042         if (!acc->name) {
5043                 if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
5044                         *val = spec->bit_offset / 8;
5045                 } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
5046                         sz = btf__resolve_size(spec->btf, acc->type_id);
5047                         if (sz < 0)
5048                                 return -EINVAL;
5049                         *val = sz;
5050                 } else {
5051                         pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
5052                                 prog->name, relo->kind, relo->insn_off / 8);
5053                         return -EINVAL;
5054                 }
5055                 if (validate)
5056                         *validate = true;
5057                 return 0;
5058         }
5059
5060         m = btf_members(t) + acc->idx;
5061         mt = skip_mods_and_typedefs(spec->btf, m->type, NULL);
5062         bit_off = spec->bit_offset;
5063         bit_sz = btf_member_bitfield_size(t, acc->idx);
5064
5065         bitfield = bit_sz > 0;
5066         if (bitfield) {
5067                 byte_sz = mt->size;
5068                 byte_off = bit_off / 8 / byte_sz * byte_sz;
5069                 /* figure out smallest int size necessary for bitfield load */
5070                 while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
5071                         if (byte_sz >= 8) {
5072                                 /* bitfield can't be read with 64-bit read */
5073                                 pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
5074                                         prog->name, relo->kind, relo->insn_off / 8);
5075                                 return -E2BIG;
5076                         }
5077                         byte_sz *= 2;
5078                         byte_off = bit_off / 8 / byte_sz * byte_sz;
5079                 }
5080         } else {
5081                 sz = btf__resolve_size(spec->btf, m->type);
5082                 if (sz < 0)
5083                         return -EINVAL;
5084                 byte_sz = sz;
5085                 byte_off = spec->bit_offset / 8;
5086                 bit_sz = byte_sz * 8;
5087         }
5088
5089         /* for bitfields, all the relocatable aspects are ambiguous and we
5090          * might disagree with compiler, so turn off validation of expected
5091          * value, except for signedness
5092          */
5093         if (validate)
5094                 *validate = !bitfield;
5095
5096         switch (relo->kind) {
5097         case BPF_FIELD_BYTE_OFFSET:
5098                 *val = byte_off;
5099                 break;
5100         case BPF_FIELD_BYTE_SIZE:
5101                 *val = byte_sz;
5102                 break;
5103         case BPF_FIELD_SIGNED:
5104                 /* enums will be assumed unsigned */
5105                 *val = btf_is_enum(mt) ||
5106                        (btf_int_encoding(mt) & BTF_INT_SIGNED);
5107                 if (validate)
5108                         *validate = true; /* signedness is never ambiguous */
5109                 break;
5110         case BPF_FIELD_LSHIFT_U64:
5111 #if __BYTE_ORDER == __LITTLE_ENDIAN
5112                 *val = 64 - (bit_off + bit_sz - byte_off  * 8);
5113 #else
5114                 *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
5115 #endif
5116                 break;
5117         case BPF_FIELD_RSHIFT_U64:
5118                 *val = 64 - bit_sz;
5119                 if (validate)
5120                         *validate = true; /* right shift is never ambiguous */
5121                 break;
5122         case BPF_FIELD_EXISTS:
5123         default:
5124                 return -EOPNOTSUPP;
5125         }
5126
5127         return 0;
5128 }
5129
5130 static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
5131                                    const struct bpf_core_spec *spec,
5132                                    __u32 *val)
5133 {
5134         __s64 sz;
5135
5136         /* type-based relos return zero when target type is not found */
5137         if (!spec) {
5138                 *val = 0;
5139                 return 0;
5140         }
5141
5142         switch (relo->kind) {
5143         case BPF_TYPE_ID_TARGET:
5144                 *val = spec->root_type_id;
5145                 break;
5146         case BPF_TYPE_EXISTS:
5147                 *val = 1;
5148                 break;
5149         case BPF_TYPE_SIZE:
5150                 sz = btf__resolve_size(spec->btf, spec->root_type_id);
5151                 if (sz < 0)
5152                         return -EINVAL;
5153                 *val = sz;
5154                 break;
5155         case BPF_TYPE_ID_LOCAL:
5156         /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
5157         default:
5158                 return -EOPNOTSUPP;
5159         }
5160
5161         return 0;
5162 }
5163
5164 static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
5165                                       const struct bpf_core_spec *spec,
5166                                       __u32 *val)
5167 {
5168         const struct btf_type *t;
5169         const struct btf_enum *e;
5170
5171         switch (relo->kind) {
5172         case BPF_ENUMVAL_EXISTS:
5173                 *val = spec ? 1 : 0;
5174                 break;
5175         case BPF_ENUMVAL_VALUE:
5176                 if (!spec)
5177                         return -EUCLEAN; /* request instruction poisoning */
5178                 t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
5179                 e = btf_enum(t) + spec->spec[0].idx;
5180                 *val = e->val;
5181                 break;
5182         default:
5183                 return -EOPNOTSUPP;
5184         }
5185
5186         return 0;
5187 }
5188
5189 struct bpf_core_relo_res
5190 {
5191         /* expected value in the instruction, unless validate == false */
5192         __u32 orig_val;
5193         /* new value that needs to be patched up to */
5194         __u32 new_val;
5195         /* relocation unsuccessful, poison instruction, but don't fail load */
5196         bool poison;
5197         /* some relocations can't be validated against orig_val */
5198         bool validate;
5199 };
5200
5201 /* Calculate original and target relocation values, given local and target
5202  * specs and relocation kind. These values are calculated for each candidate.
5203  * If there are multiple candidates, resulting values should all be consistent
5204  * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
5205  * If instruction has to be poisoned, *poison will be set to true.
5206  */
5207 static int bpf_core_calc_relo(const struct bpf_program *prog,
5208                               const struct bpf_core_relo *relo,
5209                               int relo_idx,
5210                               const struct bpf_core_spec *local_spec,
5211                               const struct bpf_core_spec *targ_spec,
5212                               struct bpf_core_relo_res *res)
5213 {
5214         int err = -EOPNOTSUPP;
5215
5216         res->orig_val = 0;
5217         res->new_val = 0;
5218         res->poison = false;
5219         res->validate = true;
5220
5221         if (core_relo_is_field_based(relo->kind)) {
5222                 err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate);
5223                 err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL);
5224         } else if (core_relo_is_type_based(relo->kind)) {
5225                 err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
5226                 err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
5227         } else if (core_relo_is_enumval_based(relo->kind)) {
5228                 err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
5229                 err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
5230         }
5231
5232         if (err == -EUCLEAN) {
5233                 /* EUCLEAN is used to signal instruction poisoning request */
5234                 res->poison = true;
5235                 err = 0;
5236         } else if (err == -EOPNOTSUPP) {
5237                 /* EOPNOTSUPP means unknown/unsupported relocation */
5238                 pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
5239                         prog->name, relo_idx, core_relo_kind_str(relo->kind),
5240                         relo->kind, relo->insn_off / 8);
5241         }
5242
5243         return err;
5244 }
5245
5246 /*
5247  * Turn instruction for which CO_RE relocation failed into invalid one with
5248  * distinct signature.
5249  */
5250 static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
5251                                  int insn_idx, struct bpf_insn *insn)
5252 {
5253         pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
5254                  prog->name, relo_idx, insn_idx);
5255         insn->code = BPF_JMP | BPF_CALL;
5256         insn->dst_reg = 0;
5257         insn->src_reg = 0;
5258         insn->off = 0;
5259         /* if this instruction is reachable (not a dead code),
5260          * verifier will complain with the following message:
5261          * invalid func unknown#195896080
5262          */
5263         insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
5264 }
5265
5266 static bool is_ldimm64(struct bpf_insn *insn)
5267 {
5268         return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
5269 }
5270
5271 /*
5272  * Patch relocatable BPF instruction.
5273  *
5274  * Patched value is determined by relocation kind and target specification.
5275  * For existence relocations target spec will be NULL if field/type is not found.
5276  * Expected insn->imm value is determined using relocation kind and local
5277  * spec, and is checked before patching instruction. If actual insn->imm value
5278  * is wrong, bail out with error.
5279  *
5280  * Currently three kinds of BPF instructions are supported:
5281  * 1. rX = <imm> (assignment with immediate operand);
5282  * 2. rX += <imm> (arithmetic operations with immediate operand);
5283  * 3. rX = <imm64> (load with 64-bit immediate value).
5284  */
5285 static int bpf_core_patch_insn(struct bpf_program *prog,
5286                                const struct bpf_core_relo *relo,
5287                                int relo_idx,
5288                                const struct bpf_core_relo_res *res)
5289 {
5290         __u32 orig_val, new_val;
5291         struct bpf_insn *insn;
5292         int insn_idx;
5293         __u8 class;
5294
5295         if (relo->insn_off % BPF_INSN_SZ)
5296                 return -EINVAL;
5297         insn_idx = relo->insn_off / BPF_INSN_SZ;
5298         /* adjust insn_idx from section frame of reference to the local
5299          * program's frame of reference; (sub-)program code is not yet
5300          * relocated, so it's enough to just subtract in-section offset
5301          */
5302         insn_idx = insn_idx - prog->sec_insn_off;
5303         insn = &prog->insns[insn_idx];
5304         class = BPF_CLASS(insn->code);
5305
5306         if (res->poison) {
5307                 /* poison second part of ldimm64 to avoid confusing error from
5308                  * verifier about "unknown opcode 00"
5309                  */
5310                 if (is_ldimm64(insn))
5311                         bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
5312                 bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
5313                 return 0;
5314         }
5315
5316         orig_val = res->orig_val;
5317         new_val = res->new_val;
5318
5319         switch (class) {
5320         case BPF_ALU:
5321         case BPF_ALU64:
5322                 if (BPF_SRC(insn->code) != BPF_K)
5323                         return -EINVAL;
5324                 if (res->validate && insn->imm != orig_val) {
5325                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
5326                                 prog->name, relo_idx,
5327                                 insn_idx, insn->imm, orig_val, new_val);
5328                         return -EINVAL;
5329                 }
5330                 orig_val = insn->imm;
5331                 insn->imm = new_val;
5332                 pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
5333                          prog->name, relo_idx, insn_idx,
5334                          orig_val, new_val);
5335                 break;
5336         case BPF_LDX:
5337         case BPF_ST:
5338         case BPF_STX:
5339                 if (res->validate && insn->off != orig_val) {
5340                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
5341                                 prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
5342                         return -EINVAL;
5343                 }
5344                 if (new_val > SHRT_MAX) {
5345                         pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
5346                                 prog->name, relo_idx, insn_idx, new_val);
5347                         return -ERANGE;
5348                 }
5349                 orig_val = insn->off;
5350                 insn->off = new_val;
5351                 pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
5352                          prog->name, relo_idx, insn_idx, orig_val, new_val);
5353                 break;
5354         case BPF_LD: {
5355                 __u64 imm;
5356
5357                 if (!is_ldimm64(insn) ||
5358                     insn[0].src_reg != 0 || insn[0].off != 0 ||
5359                     insn_idx + 1 >= prog->insns_cnt ||
5360                     insn[1].code != 0 || insn[1].dst_reg != 0 ||
5361                     insn[1].src_reg != 0 || insn[1].off != 0) {
5362                         pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
5363                                 prog->name, relo_idx, insn_idx);
5364                         return -EINVAL;
5365                 }
5366
5367                 imm = insn[0].imm + ((__u64)insn[1].imm << 32);
5368                 if (res->validate && imm != orig_val) {
5369                         pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
5370                                 prog->name, relo_idx,
5371                                 insn_idx, (unsigned long long)imm,
5372                                 orig_val, new_val);
5373                         return -EINVAL;
5374                 }
5375
5376                 insn[0].imm = new_val;
5377                 insn[1].imm = 0; /* currently only 32-bit values are supported */
5378                 pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
5379                          prog->name, relo_idx, insn_idx,
5380                          (unsigned long long)imm, new_val);
5381                 break;
5382         }
5383         default:
5384                 pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
5385                         prog->name, relo_idx, insn_idx, insn->code,
5386                         insn->src_reg, insn->dst_reg, insn->off, insn->imm);
5387                 return -EINVAL;
5388         }
5389
5390         return 0;
5391 }
5392
5393 /* Output spec definition in the format:
5394  * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
5395  * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
5396  */
5397 static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
5398 {
5399         const struct btf_type *t;
5400         const struct btf_enum *e;
5401         const char *s;
5402         __u32 type_id;
5403         int i;
5404
5405         type_id = spec->root_type_id;
5406         t = btf__type_by_id(spec->btf, type_id);
5407         s = btf__name_by_offset(spec->btf, t->name_off);
5408
5409         libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
5410
5411         if (core_relo_is_type_based(spec->relo_kind))
5412                 return;
5413
5414         if (core_relo_is_enumval_based(spec->relo_kind)) {
5415                 t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
5416                 e = btf_enum(t) + spec->raw_spec[0];
5417                 s = btf__name_by_offset(spec->btf, e->name_off);
5418
5419                 libbpf_print(level, "::%s = %u", s, e->val);
5420                 return;
5421         }
5422
5423         if (core_relo_is_field_based(spec->relo_kind)) {
5424                 for (i = 0; i < spec->len; i++) {
5425                         if (spec->spec[i].name)
5426                                 libbpf_print(level, ".%s", spec->spec[i].name);
5427                         else if (i > 0 || spec->spec[i].idx > 0)
5428                                 libbpf_print(level, "[%u]", spec->spec[i].idx);
5429                 }
5430
5431                 libbpf_print(level, " (");
5432                 for (i = 0; i < spec->raw_len; i++)
5433                         libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
5434
5435                 if (spec->bit_offset % 8)
5436                         libbpf_print(level, " @ offset %u.%u)",
5437                                      spec->bit_offset / 8, spec->bit_offset % 8);
5438                 else
5439                         libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
5440                 return;
5441         }
5442 }
5443
5444 static size_t bpf_core_hash_fn(const void *key, void *ctx)
5445 {
5446         return (size_t)key;
5447 }
5448
5449 static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
5450 {
5451         return k1 == k2;
5452 }
5453
5454 static void *u32_as_hash_key(__u32 x)
5455 {
5456         return (void *)(uintptr_t)x;
5457 }
5458
5459 /*
5460  * CO-RE relocate single instruction.
5461  *
5462  * The outline and important points of the algorithm:
5463  * 1. For given local type, find corresponding candidate target types.
5464  *    Candidate type is a type with the same "essential" name, ignoring
5465  *    everything after last triple underscore (___). E.g., `sample`,
5466  *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
5467  *    for each other. Names with triple underscore are referred to as
5468  *    "flavors" and are useful, among other things, to allow to
5469  *    specify/support incompatible variations of the same kernel struct, which
5470  *    might differ between different kernel versions and/or build
5471  *    configurations.
5472  *
5473  *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
5474  *    converter, when deduplicated BTF of a kernel still contains more than
5475  *    one different types with the same name. In that case, ___2, ___3, etc
5476  *    are appended starting from second name conflict. But start flavors are
5477  *    also useful to be defined "locally", in BPF program, to extract same
5478  *    data from incompatible changes between different kernel
5479  *    versions/configurations. For instance, to handle field renames between
5480  *    kernel versions, one can use two flavors of the struct name with the
5481  *    same common name and use conditional relocations to extract that field,
5482  *    depending on target kernel version.
5483  * 2. For each candidate type, try to match local specification to this
5484  *    candidate target type. Matching involves finding corresponding
5485  *    high-level spec accessors, meaning that all named fields should match,
5486  *    as well as all array accesses should be within the actual bounds. Also,
5487  *    types should be compatible (see bpf_core_fields_are_compat for details).
5488  * 3. It is supported and expected that there might be multiple flavors
5489  *    matching the spec. As long as all the specs resolve to the same set of
5490  *    offsets across all candidates, there is no error. If there is any
5491  *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
5492  *    imprefection of BTF deduplication, which can cause slight duplication of
5493  *    the same BTF type, if some directly or indirectly referenced (by
5494  *    pointer) type gets resolved to different actual types in different
5495  *    object files. If such situation occurs, deduplicated BTF will end up
5496  *    with two (or more) structurally identical types, which differ only in
5497  *    types they refer to through pointer. This should be OK in most cases and
5498  *    is not an error.
5499  * 4. Candidate types search is performed by linearly scanning through all
5500  *    types in target BTF. It is anticipated that this is overall more
5501  *    efficient memory-wise and not significantly worse (if not better)
5502  *    CPU-wise compared to prebuilding a map from all local type names to
5503  *    a list of candidate type names. It's also sped up by caching resolved
5504  *    list of matching candidates per each local "root" type ID, that has at
5505  *    least one bpf_core_relo associated with it. This list is shared
5506  *    between multiple relocations for the same type ID and is updated as some
5507  *    of the candidates are pruned due to structural incompatibility.
5508  */
5509 static int bpf_core_apply_relo(struct bpf_program *prog,
5510                                const struct bpf_core_relo *relo,
5511                                int relo_idx,
5512                                const struct btf *local_btf,
5513                                const struct btf *targ_btf,
5514                                struct hashmap *cand_cache)
5515 {
5516         struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
5517         const void *type_key = u32_as_hash_key(relo->type_id);
5518         struct bpf_core_relo_res cand_res, targ_res;
5519         const struct btf_type *local_type;
5520         const char *local_name;
5521         struct ids_vec *cand_ids;
5522         __u32 local_id, cand_id;
5523         const char *spec_str;
5524         int i, j, err;
5525
5526         local_id = relo->type_id;
5527         local_type = btf__type_by_id(local_btf, local_id);
5528         if (!local_type)
5529                 return -EINVAL;
5530
5531         local_name = btf__name_by_offset(local_btf, local_type->name_off);
5532         if (!local_name)
5533                 return -EINVAL;
5534
5535         spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
5536         if (str_is_empty(spec_str))
5537                 return -EINVAL;
5538
5539         err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
5540         if (err) {
5541                 pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
5542                         prog->name, relo_idx, local_id, btf_kind_str(local_type),
5543                         str_is_empty(local_name) ? "<anon>" : local_name,
5544                         spec_str, err);
5545                 return -EINVAL;
5546         }
5547
5548         pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
5549                  relo_idx, core_relo_kind_str(relo->kind), relo->kind);
5550         bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
5551         libbpf_print(LIBBPF_DEBUG, "\n");
5552
5553         /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
5554         if (relo->kind == BPF_TYPE_ID_LOCAL) {
5555                 targ_res.validate = true;
5556                 targ_res.poison = false;
5557                 targ_res.orig_val = local_spec.root_type_id;
5558                 targ_res.new_val = local_spec.root_type_id;
5559                 goto patch_insn;
5560         }
5561
5562         /* libbpf doesn't support candidate search for anonymous types */
5563         if (str_is_empty(spec_str)) {
5564                 pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
5565                         prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
5566                 return -EOPNOTSUPP;
5567         }
5568
5569         if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
5570                 cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
5571                 if (IS_ERR(cand_ids)) {
5572                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
5573                                 prog->name, relo_idx, local_id, btf_kind_str(local_type),
5574                                 local_name, PTR_ERR(cand_ids));
5575                         return PTR_ERR(cand_ids);
5576                 }
5577                 err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
5578                 if (err) {
5579                         bpf_core_free_cands(cand_ids);
5580                         return err;
5581                 }
5582         }
5583
5584         for (i = 0, j = 0; i < cand_ids->len; i++) {
5585                 cand_id = cand_ids->data[i];
5586                 err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
5587                 if (err < 0) {
5588                         pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
5589                                 prog->name, relo_idx, i);
5590                         bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
5591                         libbpf_print(LIBBPF_WARN, ": %d\n", err);
5592                         return err;
5593                 }
5594
5595                 pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
5596                          relo_idx, err == 0 ? "non-matching" : "matching", i);
5597                 bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
5598                 libbpf_print(LIBBPF_DEBUG, "\n");
5599
5600                 if (err == 0)
5601                         continue;
5602
5603                 err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
5604                 if (err)
5605                         return err;
5606
5607                 if (j == 0) {
5608                         targ_res = cand_res;
5609                         targ_spec = cand_spec;
5610                 } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
5611                         /* if there are many field relo candidates, they
5612                          * should all resolve to the same bit offset
5613                          */
5614                         pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
5615                                 prog->name, relo_idx, cand_spec.bit_offset,
5616                                 targ_spec.bit_offset);
5617                         return -EINVAL;
5618                 } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
5619                         /* all candidates should result in the same relocation
5620                          * decision and value, otherwise it's dangerous to
5621                          * proceed due to ambiguity
5622                          */
5623                         pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
5624                                 prog->name, relo_idx,
5625                                 cand_res.poison ? "failure" : "success", cand_res.new_val,
5626                                 targ_res.poison ? "failure" : "success", targ_res.new_val);
5627                         return -EINVAL;
5628                 }
5629
5630                 cand_ids->data[j++] = cand_spec.root_type_id;
5631         }
5632
5633         /*
5634          * For BPF_FIELD_EXISTS relo or when used BPF program has field
5635          * existence checks or kernel version/config checks, it's expected
5636          * that we might not find any candidates. In this case, if field
5637          * wasn't found in any candidate, the list of candidates shouldn't
5638          * change at all, we'll just handle relocating appropriately,
5639          * depending on relo's kind.
5640          */
5641         if (j > 0)
5642                 cand_ids->len = j;
5643
5644         /*
5645          * If no candidates were found, it might be both a programmer error,
5646          * as well as expected case, depending whether instruction w/
5647          * relocation is guarded in some way that makes it unreachable (dead
5648          * code) if relocation can't be resolved. This is handled in
5649          * bpf_core_patch_insn() uniformly by replacing that instruction with
5650          * BPF helper call insn (using invalid helper ID). If that instruction
5651          * is indeed unreachable, then it will be ignored and eliminated by
5652          * verifier. If it was an error, then verifier will complain and point
5653          * to a specific instruction number in its log.
5654          */
5655         if (j == 0) {
5656                 pr_debug("prog '%s': relo #%d: no matching targets found\n",
5657                          prog->name, relo_idx);
5658
5659                 /* calculate single target relo result explicitly */
5660                 err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
5661                 if (err)
5662                         return err;
5663         }
5664
5665 patch_insn:
5666         /* bpf_core_patch_insn() should know how to handle missing targ_spec */
5667         err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
5668         if (err) {
5669                 pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
5670                         prog->name, relo_idx, relo->insn_off, err);
5671                 return -EINVAL;
5672         }
5673
5674         return 0;
5675 }
5676
5677 static int
5678 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5679 {
5680         const struct btf_ext_info_sec *sec;
5681         const struct bpf_core_relo *rec;
5682         const struct btf_ext_info *seg;
5683         struct hashmap_entry *entry;
5684         struct hashmap *cand_cache = NULL;
5685         struct bpf_program *prog;
5686         struct btf *targ_btf;
5687         const char *sec_name;
5688         int i, err = 0, insn_idx, sec_idx;
5689
5690         if (obj->btf_ext->core_relo_info.len == 0)
5691                 return 0;
5692
5693         if (targ_btf_path)
5694                 targ_btf = btf__parse_elf(targ_btf_path, NULL);
5695         else
5696                 targ_btf = obj->btf_vmlinux;
5697         if (IS_ERR_OR_NULL(targ_btf)) {
5698                 pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
5699                 return PTR_ERR(targ_btf);
5700         }
5701
5702         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5703         if (IS_ERR(cand_cache)) {
5704                 err = PTR_ERR(cand_cache);
5705                 goto out;
5706         }
5707
5708         seg = &obj->btf_ext->core_relo_info;
5709         for_each_btf_ext_sec(seg, sec) {
5710                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5711                 if (str_is_empty(sec_name)) {
5712                         err = -EINVAL;
5713                         goto out;
5714                 }
5715                 /* bpf_object's ELF is gone by now so it's not easy to find
5716                  * section index by section name, but we can find *any*
5717                  * bpf_program within desired section name and use it's
5718                  * prog->sec_idx to do a proper search by section index and
5719                  * instruction offset
5720                  */
5721                 prog = NULL;
5722                 for (i = 0; i < obj->nr_programs; i++) {
5723                         prog = &obj->programs[i];
5724                         if (strcmp(prog->sec_name, sec_name) == 0)
5725                                 break;
5726                 }
5727                 if (!prog) {
5728                         pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
5729                         return -ENOENT;
5730                 }
5731                 sec_idx = prog->sec_idx;
5732
5733                 pr_debug("sec '%s': found %d CO-RE relocations\n",
5734                          sec_name, sec->num_info);
5735
5736                 for_each_btf_ext_rec(seg, sec, i, rec) {
5737                         insn_idx = rec->insn_off / BPF_INSN_SZ;
5738                         prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5739                         if (!prog) {
5740                                 pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n",
5741                                         sec_name, insn_idx, i);
5742                                 err = -EINVAL;
5743                                 goto out;
5744                         }
5745
5746                         err = bpf_core_apply_relo(prog, rec, i, obj->btf,
5747                                                   targ_btf, cand_cache);
5748                         if (err) {
5749                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5750                                         prog->name, i, err);
5751                                 goto out;
5752                         }
5753                 }
5754         }
5755
5756 out:
5757         /* obj->btf_vmlinux is freed at the end of object load phase */
5758         if (targ_btf != obj->btf_vmlinux)
5759                 btf__free(targ_btf);
5760         if (!IS_ERR_OR_NULL(cand_cache)) {
5761                 hashmap__for_each_entry(cand_cache, entry, i) {
5762                         bpf_core_free_cands(entry->value);
5763                 }
5764                 hashmap__free(cand_cache);
5765         }
5766         return err;
5767 }
5768
5769 /* Relocate data references within program code:
5770  *  - map references;
5771  *  - global variable references;
5772  *  - extern references.
5773  */
5774 static int
5775 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
5776 {
5777         int i;
5778
5779         for (i = 0; i < prog->nr_reloc; i++) {
5780                 struct reloc_desc *relo = &prog->reloc_desc[i];
5781                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
5782                 struct extern_desc *ext;
5783
5784                 switch (relo->type) {
5785                 case RELO_LD64:
5786                         insn[0].src_reg = BPF_PSEUDO_MAP_FD;
5787                         insn[0].imm = obj->maps[relo->map_idx].fd;
5788                         relo->processed = true;
5789                         break;
5790                 case RELO_DATA:
5791                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5792                         insn[1].imm = insn[0].imm + relo->sym_off;
5793                         insn[0].imm = obj->maps[relo->map_idx].fd;
5794                         relo->processed = true;
5795                         break;
5796                 case RELO_EXTERN:
5797                         ext = &obj->externs[relo->sym_off];
5798                         if (ext->type == EXT_KCFG) {
5799                                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5800                                 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
5801                                 insn[1].imm = ext->kcfg.data_off;
5802                         } else /* EXT_KSYM */ {
5803                                 insn[0].imm = (__u32)ext->ksym.addr;
5804                                 insn[1].imm = ext->ksym.addr >> 32;
5805                         }
5806                         relo->processed = true;
5807                         break;
5808                 case RELO_CALL:
5809                         /* will be handled as a follow up pass */
5810                         break;
5811                 default:
5812                         pr_warn("prog '%s': relo #%d: bad relo type %d\n",
5813                                 prog->name, i, relo->type);
5814                         return -EINVAL;
5815                 }
5816         }
5817
5818         return 0;
5819 }
5820
5821 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
5822                                     const struct bpf_program *prog,
5823                                     const struct btf_ext_info *ext_info,
5824                                     void **prog_info, __u32 *prog_rec_cnt,
5825                                     __u32 *prog_rec_sz)
5826 {
5827         void *copy_start = NULL, *copy_end = NULL;
5828         void *rec, *rec_end, *new_prog_info;
5829         const struct btf_ext_info_sec *sec;
5830         size_t old_sz, new_sz;
5831         const char *sec_name;
5832         int i, off_adj;
5833
5834         for_each_btf_ext_sec(ext_info, sec) {
5835                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5836                 if (!sec_name)
5837                         return -EINVAL;
5838                 if (strcmp(sec_name, prog->sec_name) != 0)
5839                         continue;
5840
5841                 for_each_btf_ext_rec(ext_info, sec, i, rec) {
5842                         __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
5843
5844                         if (insn_off < prog->sec_insn_off)
5845                                 continue;
5846                         if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
5847                                 break;
5848
5849                         if (!copy_start)
5850                                 copy_start = rec;
5851                         copy_end = rec + ext_info->rec_size;
5852                 }
5853
5854                 if (!copy_start)
5855                         return -ENOENT;
5856
5857                 /* append func/line info of a given (sub-)program to the main
5858                  * program func/line info
5859                  */
5860                 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
5861                 new_sz = old_sz + (copy_end - copy_start);
5862                 new_prog_info = realloc(*prog_info, new_sz);
5863                 if (!new_prog_info)
5864                         return -ENOMEM;
5865                 *prog_info = new_prog_info;
5866                 *prog_rec_cnt = new_sz / ext_info->rec_size;
5867                 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
5868
5869                 /* Kernel instruction offsets are in units of 8-byte
5870                  * instructions, while .BTF.ext instruction offsets generated
5871                  * by Clang are in units of bytes. So convert Clang offsets
5872                  * into kernel offsets and adjust offset according to program
5873                  * relocated position.
5874                  */
5875                 off_adj = prog->sub_insn_off - prog->sec_insn_off;
5876                 rec = new_prog_info + old_sz;
5877                 rec_end = new_prog_info + new_sz;
5878                 for (; rec < rec_end; rec += ext_info->rec_size) {
5879                         __u32 *insn_off = rec;
5880
5881                         *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
5882                 }
5883                 *prog_rec_sz = ext_info->rec_size;
5884                 return 0;
5885         }
5886
5887         return -ENOENT;
5888 }
5889
5890 static int
5891 reloc_prog_func_and_line_info(const struct bpf_object *obj,
5892                               struct bpf_program *main_prog,
5893                               const struct bpf_program *prog)
5894 {
5895         int err;
5896
5897         /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
5898          * supprot func/line info
5899          */
5900         if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))
5901                 return 0;
5902
5903         /* only attempt func info relocation if main program's func_info
5904          * relocation was successful
5905          */
5906         if (main_prog != prog && !main_prog->func_info)
5907                 goto line_info;
5908
5909         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
5910                                        &main_prog->func_info,
5911                                        &main_prog->func_info_cnt,
5912                                        &main_prog->func_info_rec_size);
5913         if (err) {
5914                 if (err != -ENOENT) {
5915                         pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
5916                                 prog->name, err);
5917                         return err;
5918                 }
5919                 if (main_prog->func_info) {
5920                         /*
5921                          * Some info has already been found but has problem
5922                          * in the last btf_ext reloc. Must have to error out.
5923                          */
5924                         pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
5925                         return err;
5926                 }
5927                 /* Have problem loading the very first info. Ignore the rest. */
5928                 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
5929                         prog->name);
5930         }
5931
5932 line_info:
5933         /* don't relocate line info if main program's relocation failed */
5934         if (main_prog != prog && !main_prog->line_info)
5935                 return 0;
5936
5937         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
5938                                        &main_prog->line_info,
5939                                        &main_prog->line_info_cnt,
5940                                        &main_prog->line_info_rec_size);
5941         if (err) {
5942                 if (err != -ENOENT) {
5943                         pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
5944                                 prog->name, err);
5945                         return err;
5946                 }
5947                 if (main_prog->line_info) {
5948                         /*
5949                          * Some info has already been found but has problem
5950                          * in the last btf_ext reloc. Must have to error out.
5951                          */
5952                         pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
5953                         return err;
5954                 }
5955                 /* Have problem loading the very first info. Ignore the rest. */
5956                 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
5957                         prog->name);
5958         }
5959         return 0;
5960 }
5961
5962 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
5963 {
5964         size_t insn_idx = *(const size_t *)key;
5965         const struct reloc_desc *relo = elem;
5966
5967         if (insn_idx == relo->insn_idx)
5968                 return 0;
5969         return insn_idx < relo->insn_idx ? -1 : 1;
5970 }
5971
5972 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
5973 {
5974         return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
5975                        sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
5976 }
5977
5978 static int
5979 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
5980                        struct bpf_program *prog)
5981 {
5982         size_t sub_insn_idx, insn_idx, new_cnt;
5983         struct bpf_program *subprog;
5984         struct bpf_insn *insns, *insn;
5985         struct reloc_desc *relo;
5986         int err;
5987
5988         err = reloc_prog_func_and_line_info(obj, main_prog, prog);
5989         if (err)
5990                 return err;
5991
5992         for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
5993                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
5994                 if (!insn_is_subprog_call(insn))
5995                         continue;
5996
5997                 relo = find_prog_insn_relo(prog, insn_idx);
5998                 if (relo && relo->type != RELO_CALL) {
5999                         pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6000                                 prog->name, insn_idx, relo->type);
6001                         return -LIBBPF_ERRNO__RELOC;
6002                 }
6003                 if (relo) {
6004                         /* sub-program instruction index is a combination of
6005                          * an offset of a symbol pointed to by relocation and
6006                          * call instruction's imm field; for global functions,
6007                          * call always has imm = -1, but for static functions
6008                          * relocation is against STT_SECTION and insn->imm
6009                          * points to a start of a static function
6010                          */
6011                         sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6012                 } else {
6013                         /* if subprogram call is to a static function within
6014                          * the same ELF section, there won't be any relocation
6015                          * emitted, but it also means there is no additional
6016                          * offset necessary, insns->imm is relative to
6017                          * instruction's original position within the section
6018                          */
6019                         sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6020                 }
6021
6022                 /* we enforce that sub-programs should be in .text section */
6023                 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6024                 if (!subprog) {
6025                         pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6026                                 prog->name);
6027                         return -LIBBPF_ERRNO__RELOC;
6028                 }
6029
6030                 /* if it's the first call instruction calling into this
6031                  * subprogram (meaning this subprog hasn't been processed
6032                  * yet) within the context of current main program:
6033                  *   - append it at the end of main program's instructions blog;
6034                  *   - process is recursively, while current program is put on hold;
6035                  *   - if that subprogram calls some other not yet processes
6036                  *   subprogram, same thing will happen recursively until
6037                  *   there are no more unprocesses subprograms left to append
6038                  *   and relocate.
6039                  */
6040                 if (subprog->sub_insn_off == 0) {
6041                         subprog->sub_insn_off = main_prog->insns_cnt;
6042
6043                         new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6044                         insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6045                         if (!insns) {
6046                                 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6047                                 return -ENOMEM;
6048                         }
6049                         main_prog->insns = insns;
6050                         main_prog->insns_cnt = new_cnt;
6051
6052                         memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6053                                subprog->insns_cnt * sizeof(*insns));
6054
6055                         pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6056                                  main_prog->name, subprog->insns_cnt, subprog->name);
6057
6058                         err = bpf_object__reloc_code(obj, main_prog, subprog);
6059                         if (err)
6060                                 return err;
6061                 }
6062
6063                 /* main_prog->insns memory could have been re-allocated, so
6064                  * calculate pointer again
6065                  */
6066                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6067                 /* calculate correct instruction position within current main
6068                  * prog; each main prog can have a different set of
6069                  * subprograms appended (potentially in different order as
6070                  * well), so position of any subprog can be different for
6071                  * different main programs */
6072                 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6073
6074                 if (relo)
6075                         relo->processed = true;
6076
6077                 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6078                          prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6079         }
6080
6081         return 0;
6082 }
6083
6084 /*
6085  * Relocate sub-program calls.
6086  *
6087  * Algorithm operates as follows. Each entry-point BPF program (referred to as
6088  * main prog) is processed separately. For each subprog (non-entry functions,
6089  * that can be called from either entry progs or other subprogs) gets their
6090  * sub_insn_off reset to zero. This serves as indicator that this subprogram
6091  * hasn't been yet appended and relocated within current main prog. Once its
6092  * relocated, sub_insn_off will point at the position within current main prog
6093  * where given subprog was appended. This will further be used to relocate all
6094  * the call instructions jumping into this subprog.
6095  *
6096  * We start with main program and process all call instructions. If the call
6097  * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6098  * is zero), subprog instructions are appended at the end of main program's
6099  * instruction array. Then main program is "put on hold" while we recursively
6100  * process newly appended subprogram. If that subprogram calls into another
6101  * subprogram that hasn't been appended, new subprogram is appended again to
6102  * the *main* prog's instructions (subprog's instructions are always left
6103  * untouched, as they need to be in unmodified state for subsequent main progs
6104  * and subprog instructions are always sent only as part of a main prog) and
6105  * the process continues recursively. Once all the subprogs called from a main
6106  * prog or any of its subprogs are appended (and relocated), all their
6107  * positions within finalized instructions array are known, so it's easy to
6108  * rewrite call instructions with correct relative offsets, corresponding to
6109  * desired target subprog.
6110  *
6111  * Its important to realize that some subprogs might not be called from some
6112  * main prog and any of its called/used subprogs. Those will keep their
6113  * subprog->sub_insn_off as zero at all times and won't be appended to current
6114  * main prog and won't be relocated within the context of current main prog.
6115  * They might still be used from other main progs later.
6116  *
6117  * Visually this process can be shown as below. Suppose we have two main
6118  * programs mainA and mainB and BPF object contains three subprogs: subA,
6119  * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6120  * subC both call subB:
6121  *
6122  *        +--------+ +-------+
6123  *        |        v v       |
6124  *     +--+---+ +--+-+-+ +---+--+
6125  *     | subA | | subB | | subC |
6126  *     +--+---+ +------+ +---+--+
6127  *        ^                  ^
6128  *        |                  |
6129  *    +---+-------+   +------+----+
6130  *    |   mainA   |   |   mainB   |
6131  *    +-----------+   +-----------+
6132  *
6133  * We'll start relocating mainA, will find subA, append it and start
6134  * processing sub A recursively:
6135  *
6136  *    +-----------+------+
6137  *    |   mainA   | subA |
6138  *    +-----------+------+
6139  *
6140  * At this point we notice that subB is used from subA, so we append it and
6141  * relocate (there are no further subcalls from subB):
6142  *
6143  *    +-----------+------+------+
6144  *    |   mainA   | subA | subB |
6145  *    +-----------+------+------+
6146  *
6147  * At this point, we relocate subA calls, then go one level up and finish with
6148  * relocatin mainA calls. mainA is done.
6149  *
6150  * For mainB process is similar but results in different order. We start with
6151  * mainB and skip subA and subB, as mainB never calls them (at least
6152  * directly), but we see subC is needed, so we append and start processing it:
6153  *
6154  *    +-----------+------+
6155  *    |   mainB   | subC |
6156  *    +-----------+------+
6157  * Now we see subC needs subB, so we go back to it, append and relocate it:
6158  *
6159  *    +-----------+------+------+
6160  *    |   mainB   | subC | subB |
6161  *    +-----------+------+------+
6162  *
6163  * At this point we unwind recursion, relocate calls in subC, then in mainB.
6164  */
6165 static int
6166 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6167 {
6168         struct bpf_program *subprog;
6169         int i, j, err;
6170
6171         /* mark all subprogs as not relocated (yet) within the context of
6172          * current main program
6173          */
6174         for (i = 0; i < obj->nr_programs; i++) {
6175                 subprog = &obj->programs[i];
6176                 if (!prog_is_subprog(obj, subprog))
6177                         continue;
6178
6179                 subprog->sub_insn_off = 0;
6180                 for (j = 0; j < subprog->nr_reloc; j++)
6181                         if (subprog->reloc_desc[j].type == RELO_CALL)
6182                                 subprog->reloc_desc[j].processed = false;
6183         }
6184
6185         err = bpf_object__reloc_code(obj, prog, prog);
6186         if (err)
6187                 return err;
6188
6189
6190         return 0;
6191 }
6192
6193 static int
6194 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
6195 {
6196         struct bpf_program *prog;
6197         size_t i;
6198         int err;
6199
6200         if (obj->btf_ext) {
6201                 err = bpf_object__relocate_core(obj, targ_btf_path);
6202                 if (err) {
6203                         pr_warn("failed to perform CO-RE relocations: %d\n",
6204                                 err);
6205                         return err;
6206                 }
6207         }
6208         /* relocate data references first for all programs and sub-programs,
6209          * as they don't change relative to code locations, so subsequent
6210          * subprogram processing won't need to re-calculate any of them
6211          */
6212         for (i = 0; i < obj->nr_programs; i++) {
6213                 prog = &obj->programs[i];
6214                 err = bpf_object__relocate_data(obj, prog);
6215                 if (err) {
6216                         pr_warn("prog '%s': failed to relocate data references: %d\n",
6217                                 prog->name, err);
6218                         return err;
6219                 }
6220         }
6221         /* now relocate subprogram calls and append used subprograms to main
6222          * programs; each copy of subprogram code needs to be relocated
6223          * differently for each main program, because its code location might
6224          * have changed
6225          */
6226         for (i = 0; i < obj->nr_programs; i++) {
6227                 prog = &obj->programs[i];
6228                 /* sub-program's sub-calls are relocated within the context of
6229                  * its main program only
6230                  */
6231                 if (prog_is_subprog(obj, prog))
6232                         continue;
6233
6234                 err = bpf_object__relocate_calls(obj, prog);
6235                 if (err) {
6236                         pr_warn("prog '%s': failed to relocate calls: %d\n",
6237                                 prog->name, err);
6238                         return err;
6239                 }
6240         }
6241         /* free up relocation descriptors */
6242         for (i = 0; i < obj->nr_programs; i++) {
6243                 prog = &obj->programs[i];
6244                 zfree(&prog->reloc_desc);
6245                 prog->nr_reloc = 0;
6246         }
6247         return 0;
6248 }
6249
6250 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
6251                                             GElf_Shdr *shdr, Elf_Data *data);
6252
6253 static int bpf_object__collect_map_relos(struct bpf_object *obj,
6254                                          GElf_Shdr *shdr, Elf_Data *data)
6255 {
6256         const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
6257         int i, j, nrels, new_sz;
6258         const struct btf_var_secinfo *vi = NULL;
6259         const struct btf_type *sec, *var, *def;
6260         struct bpf_map *map = NULL, *targ_map;
6261         const struct btf_member *member;
6262         const char *name, *mname;
6263         Elf_Data *symbols;
6264         unsigned int moff;
6265         GElf_Sym sym;
6266         GElf_Rel rel;
6267         void *tmp;
6268
6269         if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
6270                 return -EINVAL;
6271         sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
6272         if (!sec)
6273                 return -EINVAL;
6274
6275         symbols = obj->efile.symbols;
6276         nrels = shdr->sh_size / shdr->sh_entsize;
6277         for (i = 0; i < nrels; i++) {
6278                 if (!gelf_getrel(data, i, &rel)) {
6279                         pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
6280                         return -LIBBPF_ERRNO__FORMAT;
6281                 }
6282                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
6283                         pr_warn(".maps relo #%d: symbol %zx not found\n",
6284                                 i, (size_t)GELF_R_SYM(rel.r_info));
6285                         return -LIBBPF_ERRNO__FORMAT;
6286                 }
6287                 name = elf_sym_str(obj, sym.st_name) ?: "<?>";
6288                 if (sym.st_shndx != obj->efile.btf_maps_shndx) {
6289                         pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6290                                 i, name);
6291                         return -LIBBPF_ERRNO__RELOC;
6292                 }
6293
6294                 pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
6295                          i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
6296                          (size_t)rel.r_offset, sym.st_name, name);
6297
6298                 for (j = 0; j < obj->nr_maps; j++) {
6299                         map = &obj->maps[j];
6300                         if (map->sec_idx != obj->efile.btf_maps_shndx)
6301                                 continue;
6302
6303                         vi = btf_var_secinfos(sec) + map->btf_var_idx;
6304                         if (vi->offset <= rel.r_offset &&
6305                             rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
6306                                 break;
6307                 }
6308                 if (j == obj->nr_maps) {
6309                         pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
6310                                 i, name, (size_t)rel.r_offset);
6311                         return -EINVAL;
6312                 }
6313
6314                 if (!bpf_map_type__is_map_in_map(map->def.type))
6315                         return -EINVAL;
6316                 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
6317                     map->def.key_size != sizeof(int)) {
6318                         pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6319                                 i, map->name, sizeof(int));
6320                         return -EINVAL;
6321                 }
6322
6323                 targ_map = bpf_object__find_map_by_name(obj, name);
6324                 if (!targ_map)
6325                         return -ESRCH;
6326
6327                 var = btf__type_by_id(obj->btf, vi->type);
6328                 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
6329                 if (btf_vlen(def) == 0)
6330                         return -EINVAL;
6331                 member = btf_members(def) + btf_vlen(def) - 1;
6332                 mname = btf__name_by_offset(obj->btf, member->name_off);
6333                 if (strcmp(mname, "values"))
6334                         return -EINVAL;
6335
6336                 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
6337                 if (rel.r_offset - vi->offset < moff)
6338                         return -EINVAL;
6339
6340                 moff = rel.r_offset - vi->offset - moff;
6341                 /* here we use BPF pointer size, which is always 64 bit, as we
6342                  * are parsing ELF that was built for BPF target
6343                  */
6344                 if (moff % bpf_ptr_sz)
6345                         return -EINVAL;
6346                 moff /= bpf_ptr_sz;
6347                 if (moff >= map->init_slots_sz) {
6348                         new_sz = moff + 1;
6349                         tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
6350                         if (!tmp)
6351                                 return -ENOMEM;
6352                         map->init_slots = tmp;
6353                         memset(map->init_slots + map->init_slots_sz, 0,
6354                                (new_sz - map->init_slots_sz) * host_ptr_sz);
6355                         map->init_slots_sz = new_sz;
6356                 }
6357                 map->init_slots[moff] = targ_map;
6358
6359                 pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
6360                          i, map->name, moff, name);
6361         }
6362
6363         return 0;
6364 }
6365
6366 static int cmp_relocs(const void *_a, const void *_b)
6367 {
6368         const struct reloc_desc *a = _a;
6369         const struct reloc_desc *b = _b;
6370
6371         if (a->insn_idx != b->insn_idx)
6372                 return a->insn_idx < b->insn_idx ? -1 : 1;
6373
6374         /* no two relocations should have the same insn_idx, but ... */
6375         if (a->type != b->type)
6376                 return a->type < b->type ? -1 : 1;
6377
6378         return 0;
6379 }
6380
6381 static int bpf_object__collect_relos(struct bpf_object *obj)
6382 {
6383         int i, err;
6384
6385         for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
6386                 GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
6387                 Elf_Data *data = obj->efile.reloc_sects[i].data;
6388                 int idx = shdr->sh_info;
6389
6390                 if (shdr->sh_type != SHT_REL) {
6391                         pr_warn("internal error at %d\n", __LINE__);
6392                         return -LIBBPF_ERRNO__INTERNAL;
6393                 }
6394
6395                 if (idx == obj->efile.st_ops_shndx)
6396                         err = bpf_object__collect_st_ops_relos(obj, shdr, data);
6397                 else if (idx == obj->efile.btf_maps_shndx)
6398                         err = bpf_object__collect_map_relos(obj, shdr, data);
6399                 else
6400                         err = bpf_object__collect_prog_relos(obj, shdr, data);
6401                 if (err)
6402                         return err;
6403         }
6404
6405         for (i = 0; i < obj->nr_programs; i++) {
6406                 struct bpf_program *p = &obj->programs[i];
6407                 
6408                 if (!p->nr_reloc)
6409                         continue;
6410
6411                 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6412         }
6413         return 0;
6414 }
6415
6416 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
6417 {
6418         if (BPF_CLASS(insn->code) == BPF_JMP &&
6419             BPF_OP(insn->code) == BPF_CALL &&
6420             BPF_SRC(insn->code) == BPF_K &&
6421             insn->src_reg == 0 &&
6422             insn->dst_reg == 0) {
6423                     *func_id = insn->imm;
6424                     return true;
6425         }
6426         return false;
6427 }
6428
6429 static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
6430 {
6431         struct bpf_insn *insn = prog->insns;
6432         enum bpf_func_id func_id;
6433         int i;
6434
6435         for (i = 0; i < prog->insns_cnt; i++, insn++) {
6436                 if (!insn_is_helper_call(insn, &func_id))
6437                         continue;
6438
6439                 /* on kernels that don't yet support
6440                  * bpf_probe_read_{kernel,user}[_str] helpers, fall back
6441                  * to bpf_probe_read() which works well for old kernels
6442                  */
6443                 switch (func_id) {
6444                 case BPF_FUNC_probe_read_kernel:
6445                 case BPF_FUNC_probe_read_user:
6446                         if (!kernel_supports(FEAT_PROBE_READ_KERN))
6447                                 insn->imm = BPF_FUNC_probe_read;
6448                         break;
6449                 case BPF_FUNC_probe_read_kernel_str:
6450                 case BPF_FUNC_probe_read_user_str:
6451                         if (!kernel_supports(FEAT_PROBE_READ_KERN))
6452                                 insn->imm = BPF_FUNC_probe_read_str;
6453                         break;
6454                 default:
6455                         break;
6456                 }
6457         }
6458         return 0;
6459 }
6460
6461 static int
6462 load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
6463              char *license, __u32 kern_version, int *pfd)
6464 {
6465         struct bpf_load_program_attr load_attr;
6466         char *cp, errmsg[STRERR_BUFSIZE];
6467         size_t log_buf_size = 0;
6468         char *log_buf = NULL;
6469         int btf_fd, ret;
6470
6471         if (!insns || !insns_cnt)
6472                 return -EINVAL;
6473
6474         memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
6475         load_attr.prog_type = prog->type;
6476         /* old kernels might not support specifying expected_attach_type */
6477         if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
6478             prog->sec_def->is_exp_attach_type_optional)
6479                 load_attr.expected_attach_type = 0;
6480         else
6481                 load_attr.expected_attach_type = prog->expected_attach_type;
6482         if (kernel_supports(FEAT_PROG_NAME))
6483                 load_attr.name = prog->name;
6484         load_attr.insns = insns;
6485         load_attr.insns_cnt = insns_cnt;
6486         load_attr.license = license;
6487         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
6488             prog->type == BPF_PROG_TYPE_LSM) {
6489                 load_attr.attach_btf_id = prog->attach_btf_id;
6490         } else if (prog->type == BPF_PROG_TYPE_TRACING ||
6491                    prog->type == BPF_PROG_TYPE_EXT) {
6492                 load_attr.attach_prog_fd = prog->attach_prog_fd;
6493                 load_attr.attach_btf_id = prog->attach_btf_id;
6494         } else {
6495                 load_attr.kern_version = kern_version;
6496                 load_attr.prog_ifindex = prog->prog_ifindex;
6497         }
6498         /* specify func_info/line_info only if kernel supports them */
6499         btf_fd = bpf_object__btf_fd(prog->obj);
6500         if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
6501                 load_attr.prog_btf_fd = btf_fd;
6502                 load_attr.func_info = prog->func_info;
6503                 load_attr.func_info_rec_size = prog->func_info_rec_size;
6504                 load_attr.func_info_cnt = prog->func_info_cnt;
6505                 load_attr.line_info = prog->line_info;
6506                 load_attr.line_info_rec_size = prog->line_info_rec_size;
6507                 load_attr.line_info_cnt = prog->line_info_cnt;
6508         }
6509         load_attr.log_level = prog->log_level;
6510         load_attr.prog_flags = prog->prog_flags;
6511
6512 retry_load:
6513         if (log_buf_size) {
6514                 log_buf = malloc(log_buf_size);
6515                 if (!log_buf)
6516                         return -ENOMEM;
6517
6518                 *log_buf = 0;
6519         }
6520
6521         ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
6522
6523         if (ret >= 0) {
6524                 if (log_buf && load_attr.log_level)
6525                         pr_debug("verifier log:\n%s", log_buf);
6526
6527                 if (prog->obj->rodata_map_idx >= 0 &&
6528                     kernel_supports(FEAT_PROG_BIND_MAP)) {
6529                         struct bpf_map *rodata_map =
6530                                 &prog->obj->maps[prog->obj->rodata_map_idx];
6531
6532                         if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) {
6533                                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6534                                 pr_warn("prog '%s': failed to bind .rodata map: %s\n",
6535                                         prog->name, cp);
6536                                 /* Don't fail hard if can't bind rodata. */
6537                         }
6538                 }
6539
6540                 *pfd = ret;
6541                 ret = 0;
6542                 goto out;
6543         }
6544
6545         if (!log_buf || errno == ENOSPC) {
6546                 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
6547                                    log_buf_size << 1);
6548
6549                 free(log_buf);
6550                 goto retry_load;
6551         }
6552         ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
6553         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6554         pr_warn("load bpf program failed: %s\n", cp);
6555         pr_perm_msg(ret);
6556
6557         if (log_buf && log_buf[0] != '\0') {
6558                 ret = -LIBBPF_ERRNO__VERIFY;
6559                 pr_warn("-- BEGIN DUMP LOG ---\n");
6560                 pr_warn("\n%s\n", log_buf);
6561                 pr_warn("-- END LOG --\n");
6562         } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
6563                 pr_warn("Program too large (%zu insns), at most %d insns\n",
6564                         load_attr.insns_cnt, BPF_MAXINSNS);
6565                 ret = -LIBBPF_ERRNO__PROG2BIG;
6566         } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
6567                 /* Wrong program type? */
6568                 int fd;
6569
6570                 load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
6571                 load_attr.expected_attach_type = 0;
6572                 fd = bpf_load_program_xattr(&load_attr, NULL, 0);
6573                 if (fd >= 0) {
6574                         close(fd);
6575                         ret = -LIBBPF_ERRNO__PROGTYPE;
6576                         goto out;
6577                 }
6578         }
6579
6580 out:
6581         free(log_buf);
6582         return ret;
6583 }
6584
6585 static int libbpf_find_attach_btf_id(struct bpf_program *prog);
6586
6587 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
6588 {
6589         int err = 0, fd, i, btf_id;
6590
6591         if (prog->obj->loaded) {
6592                 pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
6593                 return -EINVAL;
6594         }
6595
6596         if ((prog->type == BPF_PROG_TYPE_TRACING ||
6597              prog->type == BPF_PROG_TYPE_LSM ||
6598              prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
6599                 btf_id = libbpf_find_attach_btf_id(prog);
6600                 if (btf_id <= 0)
6601                         return btf_id;
6602                 prog->attach_btf_id = btf_id;
6603         }
6604
6605         if (prog->instances.nr < 0 || !prog->instances.fds) {
6606                 if (prog->preprocessor) {
6607                         pr_warn("Internal error: can't load program '%s'\n",
6608                                 prog->name);
6609                         return -LIBBPF_ERRNO__INTERNAL;
6610                 }
6611
6612                 prog->instances.fds = malloc(sizeof(int));
6613                 if (!prog->instances.fds) {
6614                         pr_warn("Not enough memory for BPF fds\n");
6615                         return -ENOMEM;
6616                 }
6617                 prog->instances.nr = 1;
6618                 prog->instances.fds[0] = -1;
6619         }
6620
6621         if (!prog->preprocessor) {
6622                 if (prog->instances.nr != 1) {
6623                         pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
6624                                 prog->name, prog->instances.nr);
6625                 }
6626                 err = load_program(prog, prog->insns, prog->insns_cnt,
6627                                    license, kern_ver, &fd);
6628                 if (!err)
6629                         prog->instances.fds[0] = fd;
6630                 goto out;
6631         }
6632
6633         for (i = 0; i < prog->instances.nr; i++) {
6634                 struct bpf_prog_prep_result result;
6635                 bpf_program_prep_t preprocessor = prog->preprocessor;
6636
6637                 memset(&result, 0, sizeof(result));
6638                 err = preprocessor(prog, i, prog->insns,
6639                                    prog->insns_cnt, &result);
6640                 if (err) {
6641                         pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
6642                                 i, prog->name);
6643                         goto out;
6644                 }
6645
6646                 if (!result.new_insn_ptr || !result.new_insn_cnt) {
6647                         pr_debug("Skip loading the %dth instance of program '%s'\n",
6648                                  i, prog->name);
6649                         prog->instances.fds[i] = -1;
6650                         if (result.pfd)
6651                                 *result.pfd = -1;
6652                         continue;
6653                 }
6654
6655                 err = load_program(prog, result.new_insn_ptr,
6656                                    result.new_insn_cnt, license, kern_ver, &fd);
6657                 if (err) {
6658                         pr_warn("Loading the %dth instance of program '%s' failed\n",
6659                                 i, prog->name);
6660                         goto out;
6661                 }
6662
6663                 if (result.pfd)
6664                         *result.pfd = fd;
6665                 prog->instances.fds[i] = fd;
6666         }
6667 out:
6668         if (err)
6669                 pr_warn("failed to load program '%s'\n", prog->name);
6670         zfree(&prog->insns);
6671         prog->insns_cnt = 0;
6672         return err;
6673 }
6674
6675 static int
6676 bpf_object__load_progs(struct bpf_object *obj, int log_level)
6677 {
6678         struct bpf_program *prog;
6679         size_t i;
6680         int err;
6681
6682         for (i = 0; i < obj->nr_programs; i++) {
6683                 prog = &obj->programs[i];
6684                 err = bpf_object__sanitize_prog(obj, prog);
6685                 if (err)
6686                         return err;
6687         }
6688
6689         for (i = 0; i < obj->nr_programs; i++) {
6690                 prog = &obj->programs[i];
6691                 if (prog_is_subprog(obj, prog))
6692                         continue;
6693                 if (!prog->load) {
6694                         pr_debug("prog '%s': skipped loading\n", prog->name);
6695                         continue;
6696                 }
6697                 prog->log_level |= log_level;
6698                 err = bpf_program__load(prog, obj->license, obj->kern_version);
6699                 if (err)
6700                         return err;
6701         }
6702         return 0;
6703 }
6704
6705 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
6706
6707 static struct bpf_object *
6708 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
6709                    const struct bpf_object_open_opts *opts)
6710 {
6711         const char *obj_name, *kconfig;
6712         struct bpf_program *prog;
6713         struct bpf_object *obj;
6714         char tmp_name[64];
6715         int err;
6716
6717         if (elf_version(EV_CURRENT) == EV_NONE) {
6718                 pr_warn("failed to init libelf for %s\n",
6719                         path ? : "(mem buf)");
6720                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
6721         }
6722
6723         if (!OPTS_VALID(opts, bpf_object_open_opts))
6724                 return ERR_PTR(-EINVAL);
6725
6726         obj_name = OPTS_GET(opts, object_name, NULL);
6727         if (obj_buf) {
6728                 if (!obj_name) {
6729                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
6730                                  (unsigned long)obj_buf,
6731                                  (unsigned long)obj_buf_sz);
6732                         obj_name = tmp_name;
6733                 }
6734                 path = obj_name;
6735                 pr_debug("loading object '%s' from buffer\n", obj_name);
6736         }
6737
6738         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
6739         if (IS_ERR(obj))
6740                 return obj;
6741
6742         kconfig = OPTS_GET(opts, kconfig, NULL);
6743         if (kconfig) {
6744                 obj->kconfig = strdup(kconfig);
6745                 if (!obj->kconfig)
6746                         return ERR_PTR(-ENOMEM);
6747         }
6748
6749         err = bpf_object__elf_init(obj);
6750         err = err ? : bpf_object__check_endianness(obj);
6751         err = err ? : bpf_object__elf_collect(obj);
6752         err = err ? : bpf_object__collect_externs(obj);
6753         err = err ? : bpf_object__finalize_btf(obj);
6754         err = err ? : bpf_object__init_maps(obj, opts);
6755         err = err ? : bpf_object__collect_relos(obj);
6756         if (err)
6757                 goto out;
6758         bpf_object__elf_finish(obj);
6759
6760         bpf_object__for_each_program(prog, obj) {
6761                 prog->sec_def = find_sec_def(prog->sec_name);
6762                 if (!prog->sec_def)
6763                         /* couldn't guess, but user might manually specify */
6764                         continue;
6765
6766                 if (prog->sec_def->is_sleepable)
6767                         prog->prog_flags |= BPF_F_SLEEPABLE;
6768                 bpf_program__set_type(prog, prog->sec_def->prog_type);
6769                 bpf_program__set_expected_attach_type(prog,
6770                                 prog->sec_def->expected_attach_type);
6771
6772                 if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
6773                     prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
6774                         prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
6775         }
6776
6777         return obj;
6778 out:
6779         bpf_object__close(obj);
6780         return ERR_PTR(err);
6781 }
6782
6783 static struct bpf_object *
6784 __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
6785 {
6786         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
6787                 .relaxed_maps = flags & MAPS_RELAX_COMPAT,
6788         );
6789
6790         /* param validation */
6791         if (!attr->file)
6792                 return NULL;
6793
6794         pr_debug("loading %s\n", attr->file);
6795         return __bpf_object__open(attr->file, NULL, 0, &opts);
6796 }
6797
6798 struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
6799 {
6800         return __bpf_object__open_xattr(attr, 0);
6801 }
6802
6803 struct bpf_object *bpf_object__open(const char *path)
6804 {
6805         struct bpf_object_open_attr attr = {
6806                 .file           = path,
6807                 .prog_type      = BPF_PROG_TYPE_UNSPEC,
6808         };
6809
6810         return bpf_object__open_xattr(&attr);
6811 }
6812
6813 struct bpf_object *
6814 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
6815 {
6816         if (!path)
6817                 return ERR_PTR(-EINVAL);
6818
6819         pr_debug("loading %s\n", path);
6820
6821         return __bpf_object__open(path, NULL, 0, opts);
6822 }
6823
6824 struct bpf_object *
6825 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
6826                      const struct bpf_object_open_opts *opts)
6827 {
6828         if (!obj_buf || obj_buf_sz == 0)
6829                 return ERR_PTR(-EINVAL);
6830
6831         return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
6832 }
6833
6834 struct bpf_object *
6835 bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
6836                         const char *name)
6837 {
6838         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
6839                 .object_name = name,
6840                 /* wrong default, but backwards-compatible */
6841                 .relaxed_maps = true,
6842         );
6843
6844         /* returning NULL is wrong, but backwards-compatible */
6845         if (!obj_buf || obj_buf_sz == 0)
6846                 return NULL;
6847
6848         return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
6849 }
6850
6851 int bpf_object__unload(struct bpf_object *obj)
6852 {
6853         size_t i;
6854
6855         if (!obj)
6856                 return -EINVAL;
6857
6858         for (i = 0; i < obj->nr_maps; i++) {
6859                 zclose(obj->maps[i].fd);
6860                 if (obj->maps[i].st_ops)
6861                         zfree(&obj->maps[i].st_ops->kern_vdata);
6862         }
6863
6864         for (i = 0; i < obj->nr_programs; i++)
6865                 bpf_program__unload(&obj->programs[i]);
6866
6867         return 0;
6868 }
6869
6870 static int bpf_object__sanitize_maps(struct bpf_object *obj)
6871 {
6872         struct bpf_map *m;
6873
6874         bpf_object__for_each_map(m, obj) {
6875                 if (!bpf_map__is_internal(m))
6876                         continue;
6877                 if (!kernel_supports(FEAT_GLOBAL_DATA)) {
6878                         pr_warn("kernel doesn't support global data\n");
6879                         return -ENOTSUP;
6880                 }
6881                 if (!kernel_supports(FEAT_ARRAY_MMAP))
6882                         m->def.map_flags ^= BPF_F_MMAPABLE;
6883         }
6884
6885         return 0;
6886 }
6887
6888 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
6889 {
6890         char sym_type, sym_name[500];
6891         unsigned long long sym_addr;
6892         struct extern_desc *ext;
6893         int ret, err = 0;
6894         FILE *f;
6895
6896         f = fopen("/proc/kallsyms", "r");
6897         if (!f) {
6898                 err = -errno;
6899                 pr_warn("failed to open /proc/kallsyms: %d\n", err);
6900                 return err;
6901         }
6902
6903         while (true) {
6904                 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
6905                              &sym_addr, &sym_type, sym_name);
6906                 if (ret == EOF && feof(f))
6907                         break;
6908                 if (ret != 3) {
6909                         pr_warn("failed to read kallsyms entry: %d\n", ret);
6910                         err = -EINVAL;
6911                         goto out;
6912                 }
6913
6914                 ext = find_extern_by_name(obj, sym_name);
6915                 if (!ext || ext->type != EXT_KSYM)
6916                         continue;
6917
6918                 if (ext->is_set && ext->ksym.addr != sym_addr) {
6919                         pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
6920                                 sym_name, ext->ksym.addr, sym_addr);
6921                         err = -EINVAL;
6922                         goto out;
6923                 }
6924                 if (!ext->is_set) {
6925                         ext->is_set = true;
6926                         ext->ksym.addr = sym_addr;
6927                         pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
6928                 }
6929         }
6930
6931 out:
6932         fclose(f);
6933         return err;
6934 }
6935
6936 static int bpf_object__resolve_externs(struct bpf_object *obj,
6937                                        const char *extra_kconfig)
6938 {
6939         bool need_config = false, need_kallsyms = false;
6940         struct extern_desc *ext;
6941         void *kcfg_data = NULL;
6942         int err, i;
6943
6944         if (obj->nr_extern == 0)
6945                 return 0;
6946
6947         if (obj->kconfig_map_idx >= 0)
6948                 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
6949
6950         for (i = 0; i < obj->nr_extern; i++) {
6951                 ext = &obj->externs[i];
6952
6953                 if (ext->type == EXT_KCFG &&
6954                     strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
6955                         void *ext_val = kcfg_data + ext->kcfg.data_off;
6956                         __u32 kver = get_kernel_version();
6957
6958                         if (!kver) {
6959                                 pr_warn("failed to get kernel version\n");
6960                                 return -EINVAL;
6961                         }
6962                         err = set_kcfg_value_num(ext, ext_val, kver);
6963                         if (err)
6964                                 return err;
6965                         pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
6966                 } else if (ext->type == EXT_KCFG &&
6967                            strncmp(ext->name, "CONFIG_", 7) == 0) {
6968                         need_config = true;
6969                 } else if (ext->type == EXT_KSYM) {
6970                         need_kallsyms = true;
6971                 } else {
6972                         pr_warn("unrecognized extern '%s'\n", ext->name);
6973                         return -EINVAL;
6974                 }
6975         }
6976         if (need_config && extra_kconfig) {
6977                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
6978                 if (err)
6979                         return -EINVAL;
6980                 need_config = false;
6981                 for (i = 0; i < obj->nr_extern; i++) {
6982                         ext = &obj->externs[i];
6983                         if (ext->type == EXT_KCFG && !ext->is_set) {
6984                                 need_config = true;
6985                                 break;
6986                         }
6987                 }
6988         }
6989         if (need_config) {
6990                 err = bpf_object__read_kconfig_file(obj, kcfg_data);
6991                 if (err)
6992                         return -EINVAL;
6993         }
6994         if (need_kallsyms) {
6995                 err = bpf_object__read_kallsyms_file(obj);
6996                 if (err)
6997                         return -EINVAL;
6998         }
6999         for (i = 0; i < obj->nr_extern; i++) {
7000                 ext = &obj->externs[i];
7001
7002                 if (!ext->is_set && !ext->is_weak) {
7003                         pr_warn("extern %s (strong) not resolved\n", ext->name);
7004                         return -ESRCH;
7005                 } else if (!ext->is_set) {
7006                         pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
7007                                  ext->name);
7008                 }
7009         }
7010
7011         return 0;
7012 }
7013
7014 int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
7015 {
7016         struct bpf_object *obj;
7017         int err, i;
7018
7019         if (!attr)
7020                 return -EINVAL;
7021         obj = attr->obj;
7022         if (!obj)
7023                 return -EINVAL;
7024
7025         if (obj->loaded) {
7026                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
7027                 return -EINVAL;
7028         }
7029
7030         err = bpf_object__probe_loading(obj);
7031         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
7032         err = err ? : bpf_object__sanitize_and_load_btf(obj);
7033         err = err ? : bpf_object__sanitize_maps(obj);
7034         err = err ? : bpf_object__load_vmlinux_btf(obj);
7035         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
7036         err = err ? : bpf_object__create_maps(obj);
7037         err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
7038         err = err ? : bpf_object__load_progs(obj, attr->log_level);
7039
7040         btf__free(obj->btf_vmlinux);
7041         obj->btf_vmlinux = NULL;
7042
7043         obj->loaded = true; /* doesn't matter if successfully or not */
7044
7045         if (err)
7046                 goto out;
7047
7048         return 0;
7049 out:
7050         /* unpin any maps that were auto-pinned during load */
7051         for (i = 0; i < obj->nr_maps; i++)
7052                 if (obj->maps[i].pinned && !obj->maps[i].reused)
7053                         bpf_map__unpin(&obj->maps[i], NULL);
7054
7055         bpf_object__unload(obj);
7056         pr_warn("failed to load object '%s'\n", obj->path);
7057         return err;
7058 }
7059
7060 int bpf_object__load(struct bpf_object *obj)
7061 {
7062         struct bpf_object_load_attr attr = {
7063                 .obj = obj,
7064         };
7065
7066         return bpf_object__load_xattr(&attr);
7067 }
7068
7069 static int make_parent_dir(const char *path)
7070 {
7071         char *cp, errmsg[STRERR_BUFSIZE];
7072         char *dname, *dir;
7073         int err = 0;
7074
7075         dname = strdup(path);
7076         if (dname == NULL)
7077                 return -ENOMEM;
7078
7079         dir = dirname(dname);
7080         if (mkdir(dir, 0700) && errno != EEXIST)
7081                 err = -errno;
7082
7083         free(dname);
7084         if (err) {
7085                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7086                 pr_warn("failed to mkdir %s: %s\n", path, cp);
7087         }
7088         return err;
7089 }
7090
7091 static int check_path(const char *path)
7092 {
7093         char *cp, errmsg[STRERR_BUFSIZE];
7094         struct statfs st_fs;
7095         char *dname, *dir;
7096         int err = 0;
7097
7098         if (path == NULL)
7099                 return -EINVAL;
7100
7101         dname = strdup(path);
7102         if (dname == NULL)
7103                 return -ENOMEM;
7104
7105         dir = dirname(dname);
7106         if (statfs(dir, &st_fs)) {
7107                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7108                 pr_warn("failed to statfs %s: %s\n", dir, cp);
7109                 err = -errno;
7110         }
7111         free(dname);
7112
7113         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
7114                 pr_warn("specified path %s is not on BPF FS\n", path);
7115                 err = -EINVAL;
7116         }
7117
7118         return err;
7119 }
7120
7121 int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
7122                               int instance)
7123 {
7124         char *cp, errmsg[STRERR_BUFSIZE];
7125         int err;
7126
7127         err = make_parent_dir(path);
7128         if (err)
7129                 return err;
7130
7131         err = check_path(path);
7132         if (err)
7133                 return err;
7134
7135         if (prog == NULL) {
7136                 pr_warn("invalid program pointer\n");
7137                 return -EINVAL;
7138         }
7139
7140         if (instance < 0 || instance >= prog->instances.nr) {
7141                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
7142                         instance, prog->name, prog->instances.nr);
7143                 return -EINVAL;
7144         }
7145
7146         if (bpf_obj_pin(prog->instances.fds[instance], path)) {
7147                 err = -errno;
7148                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
7149                 pr_warn("failed to pin program: %s\n", cp);
7150                 return err;
7151         }
7152         pr_debug("pinned program '%s'\n", path);
7153
7154         return 0;
7155 }
7156
7157 int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
7158                                 int instance)
7159 {
7160         int err;
7161
7162         err = check_path(path);
7163         if (err)
7164                 return err;
7165
7166         if (prog == NULL) {
7167                 pr_warn("invalid program pointer\n");
7168                 return -EINVAL;
7169         }
7170
7171         if (instance < 0 || instance >= prog->instances.nr) {
7172                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
7173                         instance, prog->name, prog->instances.nr);
7174                 return -EINVAL;
7175         }
7176
7177         err = unlink(path);
7178         if (err != 0)
7179                 return -errno;
7180         pr_debug("unpinned program '%s'\n", path);
7181
7182         return 0;
7183 }
7184
7185 int bpf_program__pin(struct bpf_program *prog, const char *path)
7186 {
7187         int i, err;
7188
7189         err = make_parent_dir(path);
7190         if (err)
7191                 return err;
7192
7193         err = check_path(path);
7194         if (err)
7195                 return err;
7196
7197         if (prog == NULL) {
7198                 pr_warn("invalid program pointer\n");
7199                 return -EINVAL;
7200         }
7201
7202         if (prog->instances.nr <= 0) {
7203                 pr_warn("no instances of prog %s to pin\n", prog->name);
7204                 return -EINVAL;
7205         }
7206
7207         if (prog->instances.nr == 1) {
7208                 /* don't create subdirs when pinning single instance */
7209                 return bpf_program__pin_instance(prog, path, 0);
7210         }
7211
7212         for (i = 0; i < prog->instances.nr; i++) {
7213                 char buf[PATH_MAX];
7214                 int len;
7215
7216                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
7217                 if (len < 0) {
7218                         err = -EINVAL;
7219                         goto err_unpin;
7220                 } else if (len >= PATH_MAX) {
7221                         err = -ENAMETOOLONG;
7222                         goto err_unpin;
7223                 }
7224
7225                 err = bpf_program__pin_instance(prog, buf, i);
7226                 if (err)
7227                         goto err_unpin;
7228         }
7229
7230         return 0;
7231
7232 err_unpin:
7233         for (i = i - 1; i >= 0; i--) {
7234                 char buf[PATH_MAX];
7235                 int len;
7236
7237                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
7238                 if (len < 0)
7239                         continue;
7240                 else if (len >= PATH_MAX)
7241                         continue;
7242
7243                 bpf_program__unpin_instance(prog, buf, i);
7244         }
7245
7246         rmdir(path);
7247
7248         return err;
7249 }
7250
7251 int bpf_program__unpin(struct bpf_program *prog, const char *path)
7252 {
7253         int i, err;
7254
7255         err = check_path(path);
7256         if (err)
7257                 return err;
7258
7259         if (prog == NULL) {
7260                 pr_warn("invalid program pointer\n");
7261                 return -EINVAL;
7262         }
7263
7264         if (prog->instances.nr <= 0) {
7265                 pr_warn("no instances of prog %s to pin\n", prog->name);
7266                 return -EINVAL;
7267         }
7268
7269         if (prog->instances.nr == 1) {
7270                 /* don't create subdirs when pinning single instance */
7271                 return bpf_program__unpin_instance(prog, path, 0);
7272         }
7273
7274         for (i = 0; i < prog->instances.nr; i++) {
7275                 char buf[PATH_MAX];
7276                 int len;
7277
7278                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
7279                 if (len < 0)
7280                         return -EINVAL;
7281                 else if (len >= PATH_MAX)
7282                         return -ENAMETOOLONG;
7283
7284                 err = bpf_program__unpin_instance(prog, buf, i);
7285                 if (err)
7286                         return err;
7287         }
7288
7289         err = rmdir(path);
7290         if (err)
7291                 return -errno;
7292
7293         return 0;
7294 }
7295
7296 int bpf_map__pin(struct bpf_map *map, const char *path)
7297 {
7298         char *cp, errmsg[STRERR_BUFSIZE];
7299         int err;
7300
7301         if (map == NULL) {
7302                 pr_warn("invalid map pointer\n");
7303                 return -EINVAL;
7304         }
7305
7306         if (map->pin_path) {
7307                 if (path && strcmp(path, map->pin_path)) {
7308                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7309                                 bpf_map__name(map), map->pin_path, path);
7310                         return -EINVAL;
7311                 } else if (map->pinned) {
7312                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
7313                                  bpf_map__name(map), map->pin_path);
7314                         return 0;
7315                 }
7316         } else {
7317                 if (!path) {
7318                         pr_warn("missing a path to pin map '%s' at\n",
7319                                 bpf_map__name(map));
7320                         return -EINVAL;
7321                 } else if (map->pinned) {
7322                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
7323                         return -EEXIST;
7324                 }
7325
7326                 map->pin_path = strdup(path);
7327                 if (!map->pin_path) {
7328                         err = -errno;
7329                         goto out_err;
7330                 }
7331         }
7332
7333         err = make_parent_dir(map->pin_path);
7334         if (err)
7335                 return err;
7336
7337         err = check_path(map->pin_path);
7338         if (err)
7339                 return err;
7340
7341         if (bpf_obj_pin(map->fd, map->pin_path)) {
7342                 err = -errno;
7343                 goto out_err;
7344         }
7345
7346         map->pinned = true;
7347         pr_debug("pinned map '%s'\n", map->pin_path);
7348
7349         return 0;
7350
7351 out_err:
7352         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7353         pr_warn("failed to pin map: %s\n", cp);
7354         return err;
7355 }
7356
7357 int bpf_map__unpin(struct bpf_map *map, const char *path)
7358 {
7359         int err;
7360
7361         if (map == NULL) {
7362                 pr_warn("invalid map pointer\n");
7363                 return -EINVAL;
7364         }
7365
7366         if (map->pin_path) {
7367                 if (path && strcmp(path, map->pin_path)) {
7368                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7369                                 bpf_map__name(map), map->pin_path, path);
7370                         return -EINVAL;
7371                 }
7372                 path = map->pin_path;
7373         } else if (!path) {
7374                 pr_warn("no path to unpin map '%s' from\n",
7375                         bpf_map__name(map));
7376                 return -EINVAL;
7377         }
7378
7379         err = check_path(path);
7380         if (err)
7381                 return err;
7382
7383         err = unlink(path);
7384         if (err != 0)
7385                 return -errno;
7386
7387         map->pinned = false;
7388         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
7389
7390         return 0;
7391 }
7392
7393 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
7394 {
7395         char *new = NULL;
7396
7397         if (path) {
7398                 new = strdup(path);
7399                 if (!new)
7400                         return -errno;
7401         }
7402
7403         free(map->pin_path);
7404         map->pin_path = new;
7405         return 0;
7406 }
7407
7408 const char *bpf_map__get_pin_path(const struct bpf_map *map)
7409 {
7410         return map->pin_path;
7411 }
7412
7413 bool bpf_map__is_pinned(const struct bpf_map *map)
7414 {
7415         return map->pinned;
7416 }
7417
7418 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
7419 {
7420         struct bpf_map *map;
7421         int err;
7422
7423         if (!obj)
7424                 return -ENOENT;
7425
7426         if (!obj->loaded) {
7427                 pr_warn("object not yet loaded; load it first\n");
7428                 return -ENOENT;
7429         }
7430
7431         bpf_object__for_each_map(map, obj) {
7432                 char *pin_path = NULL;
7433                 char buf[PATH_MAX];
7434
7435                 if (path) {
7436                         int len;
7437
7438                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
7439                                        bpf_map__name(map));
7440                         if (len < 0) {
7441                                 err = -EINVAL;
7442                                 goto err_unpin_maps;
7443                         } else if (len >= PATH_MAX) {
7444                                 err = -ENAMETOOLONG;
7445                                 goto err_unpin_maps;
7446                         }
7447                         pin_path = buf;
7448                 } else if (!map->pin_path) {
7449                         continue;
7450                 }
7451
7452                 err = bpf_map__pin(map, pin_path);
7453                 if (err)
7454                         goto err_unpin_maps;
7455         }
7456
7457         return 0;
7458
7459 err_unpin_maps:
7460         while ((map = bpf_map__prev(map, obj))) {
7461                 if (!map->pin_path)
7462                         continue;
7463
7464                 bpf_map__unpin(map, NULL);
7465         }
7466
7467         return err;
7468 }
7469
7470 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
7471 {
7472         struct bpf_map *map;
7473         int err;
7474
7475         if (!obj)
7476                 return -ENOENT;
7477
7478         bpf_object__for_each_map(map, obj) {
7479                 char *pin_path = NULL;
7480                 char buf[PATH_MAX];
7481
7482                 if (path) {
7483                         int len;
7484
7485                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
7486                                        bpf_map__name(map));
7487                         if (len < 0)
7488                                 return -EINVAL;
7489                         else if (len >= PATH_MAX)
7490                                 return -ENAMETOOLONG;
7491                         pin_path = buf;
7492                 } else if (!map->pin_path) {
7493                         continue;
7494                 }
7495
7496                 err = bpf_map__unpin(map, pin_path);
7497                 if (err)
7498                         return err;
7499         }
7500
7501         return 0;
7502 }
7503
7504 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
7505 {
7506         struct bpf_program *prog;
7507         int err;
7508
7509         if (!obj)
7510                 return -ENOENT;
7511
7512         if (!obj->loaded) {
7513                 pr_warn("object not yet loaded; load it first\n");
7514                 return -ENOENT;
7515         }
7516
7517         bpf_object__for_each_program(prog, obj) {
7518                 char buf[PATH_MAX];
7519                 int len;
7520
7521                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
7522                                prog->pin_name);
7523                 if (len < 0) {
7524                         err = -EINVAL;
7525                         goto err_unpin_programs;
7526                 } else if (len >= PATH_MAX) {
7527                         err = -ENAMETOOLONG;
7528                         goto err_unpin_programs;
7529                 }
7530
7531                 err = bpf_program__pin(prog, buf);
7532                 if (err)
7533                         goto err_unpin_programs;
7534         }
7535
7536         return 0;
7537
7538 err_unpin_programs:
7539         while ((prog = bpf_program__prev(prog, obj))) {
7540                 char buf[PATH_MAX];
7541                 int len;
7542
7543                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
7544                                prog->pin_name);
7545                 if (len < 0)
7546                         continue;
7547                 else if (len >= PATH_MAX)
7548                         continue;
7549
7550                 bpf_program__unpin(prog, buf);
7551         }
7552
7553         return err;
7554 }
7555
7556 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
7557 {
7558         struct bpf_program *prog;
7559         int err;
7560
7561         if (!obj)
7562                 return -ENOENT;
7563
7564         bpf_object__for_each_program(prog, obj) {
7565                 char buf[PATH_MAX];
7566                 int len;
7567
7568                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
7569                                prog->pin_name);
7570                 if (len < 0)
7571                         return -EINVAL;
7572                 else if (len >= PATH_MAX)
7573                         return -ENAMETOOLONG;
7574
7575                 err = bpf_program__unpin(prog, buf);
7576                 if (err)
7577                         return err;
7578         }
7579
7580         return 0;
7581 }
7582
7583 int bpf_object__pin(struct bpf_object *obj, const char *path)
7584 {
7585         int err;
7586
7587         err = bpf_object__pin_maps(obj, path);
7588         if (err)
7589                 return err;
7590
7591         err = bpf_object__pin_programs(obj, path);
7592         if (err) {
7593                 bpf_object__unpin_maps(obj, path);
7594                 return err;
7595         }
7596
7597         return 0;
7598 }
7599
7600 static void bpf_map__destroy(struct bpf_map *map)
7601 {
7602         if (map->clear_priv)
7603                 map->clear_priv(map, map->priv);
7604         map->priv = NULL;
7605         map->clear_priv = NULL;
7606
7607         if (map->inner_map) {
7608                 bpf_map__destroy(map->inner_map);
7609                 zfree(&map->inner_map);
7610         }
7611
7612         zfree(&map->init_slots);
7613         map->init_slots_sz = 0;
7614
7615         if (map->mmaped) {
7616                 munmap(map->mmaped, bpf_map_mmap_sz(map));
7617                 map->mmaped = NULL;
7618         }
7619
7620         if (map->st_ops) {
7621                 zfree(&map->st_ops->data);
7622                 zfree(&map->st_ops->progs);
7623                 zfree(&map->st_ops->kern_func_off);
7624                 zfree(&map->st_ops);
7625         }
7626
7627         zfree(&map->name);
7628         zfree(&map->pin_path);
7629
7630         if (map->fd >= 0)
7631                 zclose(map->fd);
7632 }
7633
7634 void bpf_object__close(struct bpf_object *obj)
7635 {
7636         size_t i;
7637
7638         if (IS_ERR_OR_NULL(obj))
7639                 return;
7640
7641         if (obj->clear_priv)
7642                 obj->clear_priv(obj, obj->priv);
7643
7644         bpf_object__elf_finish(obj);
7645         bpf_object__unload(obj);
7646         btf__free(obj->btf);
7647         btf_ext__free(obj->btf_ext);
7648
7649         for (i = 0; i < obj->nr_maps; i++)
7650                 bpf_map__destroy(&obj->maps[i]);
7651
7652         zfree(&obj->kconfig);
7653         zfree(&obj->externs);
7654         obj->nr_extern = 0;
7655
7656         zfree(&obj->maps);
7657         obj->nr_maps = 0;
7658
7659         if (obj->programs && obj->nr_programs) {
7660                 for (i = 0; i < obj->nr_programs; i++)
7661                         bpf_program__exit(&obj->programs[i]);
7662         }
7663         zfree(&obj->programs);
7664
7665         list_del(&obj->list);
7666         free(obj);
7667 }
7668
7669 struct bpf_object *
7670 bpf_object__next(struct bpf_object *prev)
7671 {
7672         struct bpf_object *next;
7673
7674         if (!prev)
7675                 next = list_first_entry(&bpf_objects_list,
7676                                         struct bpf_object,
7677                                         list);
7678         else
7679                 next = list_next_entry(prev, list);
7680
7681         /* Empty list is noticed here so don't need checking on entry. */
7682         if (&next->list == &bpf_objects_list)
7683                 return NULL;
7684
7685         return next;
7686 }
7687
7688 const char *bpf_object__name(const struct bpf_object *obj)
7689 {
7690         return obj ? obj->name : ERR_PTR(-EINVAL);
7691 }
7692
7693 unsigned int bpf_object__kversion(const struct bpf_object *obj)
7694 {
7695         return obj ? obj->kern_version : 0;
7696 }
7697
7698 struct btf *bpf_object__btf(const struct bpf_object *obj)
7699 {
7700         return obj ? obj->btf : NULL;
7701 }
7702
7703 int bpf_object__btf_fd(const struct bpf_object *obj)
7704 {
7705         return obj->btf ? btf__fd(obj->btf) : -1;
7706 }
7707
7708 int bpf_object__set_priv(struct bpf_object *obj, void *priv,
7709                          bpf_object_clear_priv_t clear_priv)
7710 {
7711         if (obj->priv && obj->clear_priv)
7712                 obj->clear_priv(obj, obj->priv);
7713
7714         obj->priv = priv;
7715         obj->clear_priv = clear_priv;
7716         return 0;
7717 }
7718
7719 void *bpf_object__priv(const struct bpf_object *obj)
7720 {
7721         return obj ? obj->priv : ERR_PTR(-EINVAL);
7722 }
7723
7724 static struct bpf_program *
7725 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
7726                     bool forward)
7727 {
7728         size_t nr_programs = obj->nr_programs;
7729         ssize_t idx;
7730
7731         if (!nr_programs)
7732                 return NULL;
7733
7734         if (!p)
7735                 /* Iter from the beginning */
7736                 return forward ? &obj->programs[0] :
7737                         &obj->programs[nr_programs - 1];
7738
7739         if (p->obj != obj) {
7740                 pr_warn("error: program handler doesn't match object\n");
7741                 return NULL;
7742         }
7743
7744         idx = (p - obj->programs) + (forward ? 1 : -1);
7745         if (idx >= obj->nr_programs || idx < 0)
7746                 return NULL;
7747         return &obj->programs[idx];
7748 }
7749
7750 struct bpf_program *
7751 bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
7752 {
7753         struct bpf_program *prog = prev;
7754
7755         do {
7756                 prog = __bpf_program__iter(prog, obj, true);
7757         } while (prog && prog_is_subprog(obj, prog));
7758
7759         return prog;
7760 }
7761
7762 struct bpf_program *
7763 bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
7764 {
7765         struct bpf_program *prog = next;
7766
7767         do {
7768                 prog = __bpf_program__iter(prog, obj, false);
7769         } while (prog && prog_is_subprog(obj, prog));
7770
7771         return prog;
7772 }
7773
7774 int bpf_program__set_priv(struct bpf_program *prog, void *priv,
7775                           bpf_program_clear_priv_t clear_priv)
7776 {
7777         if (prog->priv && prog->clear_priv)
7778                 prog->clear_priv(prog, prog->priv);
7779
7780         prog->priv = priv;
7781         prog->clear_priv = clear_priv;
7782         return 0;
7783 }
7784
7785 void *bpf_program__priv(const struct bpf_program *prog)
7786 {
7787         return prog ? prog->priv : ERR_PTR(-EINVAL);
7788 }
7789
7790 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
7791 {
7792         prog->prog_ifindex = ifindex;
7793 }
7794
7795 const char *bpf_program__name(const struct bpf_program *prog)
7796 {
7797         return prog->name;
7798 }
7799
7800 const char *bpf_program__section_name(const struct bpf_program *prog)
7801 {
7802         return prog->sec_name;
7803 }
7804
7805 const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
7806 {
7807         const char *title;
7808
7809         title = prog->sec_name;
7810         if (needs_copy) {
7811                 title = strdup(title);
7812                 if (!title) {
7813                         pr_warn("failed to strdup program title\n");
7814                         return ERR_PTR(-ENOMEM);
7815                 }
7816         }
7817
7818         return title;
7819 }
7820
7821 bool bpf_program__autoload(const struct bpf_program *prog)
7822 {
7823         return prog->load;
7824 }
7825
7826 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
7827 {
7828         if (prog->obj->loaded)
7829                 return -EINVAL;
7830
7831         prog->load = autoload;
7832         return 0;
7833 }
7834
7835 int bpf_program__fd(const struct bpf_program *prog)
7836 {
7837         return bpf_program__nth_fd(prog, 0);
7838 }
7839
7840 size_t bpf_program__size(const struct bpf_program *prog)
7841 {
7842         return prog->insns_cnt * BPF_INSN_SZ;
7843 }
7844
7845 int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
7846                           bpf_program_prep_t prep)
7847 {
7848         int *instances_fds;
7849
7850         if (nr_instances <= 0 || !prep)
7851                 return -EINVAL;
7852
7853         if (prog->instances.nr > 0 || prog->instances.fds) {
7854                 pr_warn("Can't set pre-processor after loading\n");
7855                 return -EINVAL;
7856         }
7857
7858         instances_fds = malloc(sizeof(int) * nr_instances);
7859         if (!instances_fds) {
7860                 pr_warn("alloc memory failed for fds\n");
7861                 return -ENOMEM;
7862         }
7863
7864         /* fill all fd with -1 */
7865         memset(instances_fds, -1, sizeof(int) * nr_instances);
7866
7867         prog->instances.nr = nr_instances;
7868         prog->instances.fds = instances_fds;
7869         prog->preprocessor = prep;
7870         return 0;
7871 }
7872
7873 int bpf_program__nth_fd(const struct bpf_program *prog, int n)
7874 {
7875         int fd;
7876
7877         if (!prog)
7878                 return -EINVAL;
7879
7880         if (n >= prog->instances.nr || n < 0) {
7881                 pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
7882                         n, prog->name, prog->instances.nr);
7883                 return -EINVAL;
7884         }
7885
7886         fd = prog->instances.fds[n];
7887         if (fd < 0) {
7888                 pr_warn("%dth instance of program '%s' is invalid\n",
7889                         n, prog->name);
7890                 return -ENOENT;
7891         }
7892
7893         return fd;
7894 }
7895
7896 enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
7897 {
7898         return prog->type;
7899 }
7900
7901 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
7902 {
7903         prog->type = type;
7904 }
7905
7906 static bool bpf_program__is_type(const struct bpf_program *prog,
7907                                  enum bpf_prog_type type)
7908 {
7909         return prog ? (prog->type == type) : false;
7910 }
7911
7912 #define BPF_PROG_TYPE_FNS(NAME, TYPE)                           \
7913 int bpf_program__set_##NAME(struct bpf_program *prog)           \
7914 {                                                               \
7915         if (!prog)                                              \
7916                 return -EINVAL;                                 \
7917         bpf_program__set_type(prog, TYPE);                      \
7918         return 0;                                               \
7919 }                                                               \
7920                                                                 \
7921 bool bpf_program__is_##NAME(const struct bpf_program *prog)     \
7922 {                                                               \
7923         return bpf_program__is_type(prog, TYPE);                \
7924 }                                                               \
7925
7926 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
7927 BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
7928 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
7929 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
7930 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
7931 BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
7932 BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
7933 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
7934 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
7935 BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
7936 BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
7937 BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
7938 BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
7939
7940 enum bpf_attach_type
7941 bpf_program__get_expected_attach_type(struct bpf_program *prog)
7942 {
7943         return prog->expected_attach_type;
7944 }
7945
7946 void bpf_program__set_expected_attach_type(struct bpf_program *prog,
7947                                            enum bpf_attach_type type)
7948 {
7949         prog->expected_attach_type = type;
7950 }
7951
7952 #define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional,           \
7953                           attachable, attach_btf)                           \
7954         {                                                                   \
7955                 .sec = string,                                              \
7956                 .len = sizeof(string) - 1,                                  \
7957                 .prog_type = ptype,                                         \
7958                 .expected_attach_type = eatype,                             \
7959                 .is_exp_attach_type_optional = eatype_optional,             \
7960                 .is_attachable = attachable,                                \
7961                 .is_attach_btf = attach_btf,                                \
7962         }
7963
7964 /* Programs that can NOT be attached. */
7965 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
7966
7967 /* Programs that can be attached. */
7968 #define BPF_APROG_SEC(string, ptype, atype) \
7969         BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0)
7970
7971 /* Programs that must specify expected attach type at load time. */
7972 #define BPF_EAPROG_SEC(string, ptype, eatype) \
7973         BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0)
7974
7975 /* Programs that use BTF to identify attach point */
7976 #define BPF_PROG_BTF(string, ptype, eatype) \
7977         BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1)
7978
7979 /* Programs that can be attached but attach type can't be identified by section
7980  * name. Kept for backward compatibility.
7981  */
7982 #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
7983
7984 #define SEC_DEF(sec_pfx, ptype, ...) {                                      \
7985         .sec = sec_pfx,                                                     \
7986         .len = sizeof(sec_pfx) - 1,                                         \
7987         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
7988         __VA_ARGS__                                                         \
7989 }
7990
7991 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
7992                                       struct bpf_program *prog);
7993 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
7994                                   struct bpf_program *prog);
7995 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
7996                                       struct bpf_program *prog);
7997 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
7998                                      struct bpf_program *prog);
7999 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
8000                                    struct bpf_program *prog);
8001 static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
8002                                     struct bpf_program *prog);
8003
8004 static const struct bpf_sec_def section_defs[] = {
8005         BPF_PROG_SEC("socket",                  BPF_PROG_TYPE_SOCKET_FILTER),
8006         BPF_PROG_SEC("sk_reuseport",            BPF_PROG_TYPE_SK_REUSEPORT),
8007         SEC_DEF("kprobe/", KPROBE,
8008                 .attach_fn = attach_kprobe),
8009         BPF_PROG_SEC("uprobe/",                 BPF_PROG_TYPE_KPROBE),
8010         SEC_DEF("kretprobe/", KPROBE,
8011                 .attach_fn = attach_kprobe),
8012         BPF_PROG_SEC("uretprobe/",              BPF_PROG_TYPE_KPROBE),
8013         BPF_PROG_SEC("classifier",              BPF_PROG_TYPE_SCHED_CLS),
8014         BPF_PROG_SEC("action",                  BPF_PROG_TYPE_SCHED_ACT),
8015         SEC_DEF("tracepoint/", TRACEPOINT,
8016                 .attach_fn = attach_tp),
8017         SEC_DEF("tp/", TRACEPOINT,
8018                 .attach_fn = attach_tp),
8019         SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
8020                 .attach_fn = attach_raw_tp),
8021         SEC_DEF("raw_tp/", RAW_TRACEPOINT,
8022                 .attach_fn = attach_raw_tp),
8023         SEC_DEF("tp_btf/", TRACING,
8024                 .expected_attach_type = BPF_TRACE_RAW_TP,
8025                 .is_attach_btf = true,
8026                 .attach_fn = attach_trace),
8027         SEC_DEF("fentry/", TRACING,
8028                 .expected_attach_type = BPF_TRACE_FENTRY,
8029                 .is_attach_btf = true,
8030                 .attach_fn = attach_trace),
8031         SEC_DEF("fmod_ret/", TRACING,
8032                 .expected_attach_type = BPF_MODIFY_RETURN,
8033                 .is_attach_btf = true,
8034                 .attach_fn = attach_trace),
8035         SEC_DEF("fexit/", TRACING,
8036                 .expected_attach_type = BPF_TRACE_FEXIT,
8037                 .is_attach_btf = true,
8038                 .attach_fn = attach_trace),
8039         SEC_DEF("fentry.s/", TRACING,
8040                 .expected_attach_type = BPF_TRACE_FENTRY,
8041                 .is_attach_btf = true,
8042                 .is_sleepable = true,
8043                 .attach_fn = attach_trace),
8044         SEC_DEF("fmod_ret.s/", TRACING,
8045                 .expected_attach_type = BPF_MODIFY_RETURN,
8046                 .is_attach_btf = true,
8047                 .is_sleepable = true,
8048                 .attach_fn = attach_trace),
8049         SEC_DEF("fexit.s/", TRACING,
8050                 .expected_attach_type = BPF_TRACE_FEXIT,
8051                 .is_attach_btf = true,
8052                 .is_sleepable = true,
8053                 .attach_fn = attach_trace),
8054         SEC_DEF("freplace/", EXT,
8055                 .is_attach_btf = true,
8056                 .attach_fn = attach_trace),
8057         SEC_DEF("lsm/", LSM,
8058                 .is_attach_btf = true,
8059                 .expected_attach_type = BPF_LSM_MAC,
8060                 .attach_fn = attach_lsm),
8061         SEC_DEF("lsm.s/", LSM,
8062                 .is_attach_btf = true,
8063                 .is_sleepable = true,
8064                 .expected_attach_type = BPF_LSM_MAC,
8065                 .attach_fn = attach_lsm),
8066         SEC_DEF("iter/", TRACING,
8067                 .expected_attach_type = BPF_TRACE_ITER,
8068                 .is_attach_btf = true,
8069                 .attach_fn = attach_iter),
8070         BPF_EAPROG_SEC("xdp_devmap/",           BPF_PROG_TYPE_XDP,
8071                                                 BPF_XDP_DEVMAP),
8072         BPF_EAPROG_SEC("xdp_cpumap/",           BPF_PROG_TYPE_XDP,
8073                                                 BPF_XDP_CPUMAP),
8074         BPF_APROG_SEC("xdp",                    BPF_PROG_TYPE_XDP,
8075                                                 BPF_XDP),
8076         BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
8077         BPF_PROG_SEC("lwt_in",                  BPF_PROG_TYPE_LWT_IN),
8078         BPF_PROG_SEC("lwt_out",                 BPF_PROG_TYPE_LWT_OUT),
8079         BPF_PROG_SEC("lwt_xmit",                BPF_PROG_TYPE_LWT_XMIT),
8080         BPF_PROG_SEC("lwt_seg6local",           BPF_PROG_TYPE_LWT_SEG6LOCAL),
8081         BPF_APROG_SEC("cgroup_skb/ingress",     BPF_PROG_TYPE_CGROUP_SKB,
8082                                                 BPF_CGROUP_INET_INGRESS),
8083         BPF_APROG_SEC("cgroup_skb/egress",      BPF_PROG_TYPE_CGROUP_SKB,
8084                                                 BPF_CGROUP_INET_EGRESS),
8085         BPF_APROG_COMPAT("cgroup/skb",          BPF_PROG_TYPE_CGROUP_SKB),
8086         BPF_EAPROG_SEC("cgroup/sock_create",    BPF_PROG_TYPE_CGROUP_SOCK,
8087                                                 BPF_CGROUP_INET_SOCK_CREATE),
8088         BPF_EAPROG_SEC("cgroup/sock_release",   BPF_PROG_TYPE_CGROUP_SOCK,
8089                                                 BPF_CGROUP_INET_SOCK_RELEASE),
8090         BPF_APROG_SEC("cgroup/sock",            BPF_PROG_TYPE_CGROUP_SOCK,
8091                                                 BPF_CGROUP_INET_SOCK_CREATE),
8092         BPF_EAPROG_SEC("cgroup/post_bind4",     BPF_PROG_TYPE_CGROUP_SOCK,
8093                                                 BPF_CGROUP_INET4_POST_BIND),
8094         BPF_EAPROG_SEC("cgroup/post_bind6",     BPF_PROG_TYPE_CGROUP_SOCK,
8095                                                 BPF_CGROUP_INET6_POST_BIND),
8096         BPF_APROG_SEC("cgroup/dev",             BPF_PROG_TYPE_CGROUP_DEVICE,
8097                                                 BPF_CGROUP_DEVICE),
8098         BPF_APROG_SEC("sockops",                BPF_PROG_TYPE_SOCK_OPS,
8099                                                 BPF_CGROUP_SOCK_OPS),
8100         BPF_APROG_SEC("sk_skb/stream_parser",   BPF_PROG_TYPE_SK_SKB,
8101                                                 BPF_SK_SKB_STREAM_PARSER),
8102         BPF_APROG_SEC("sk_skb/stream_verdict",  BPF_PROG_TYPE_SK_SKB,
8103                                                 BPF_SK_SKB_STREAM_VERDICT),
8104         BPF_APROG_COMPAT("sk_skb",              BPF_PROG_TYPE_SK_SKB),
8105         BPF_APROG_SEC("sk_msg",                 BPF_PROG_TYPE_SK_MSG,
8106                                                 BPF_SK_MSG_VERDICT),
8107         BPF_APROG_SEC("lirc_mode2",             BPF_PROG_TYPE_LIRC_MODE2,
8108                                                 BPF_LIRC_MODE2),
8109         BPF_APROG_SEC("flow_dissector",         BPF_PROG_TYPE_FLOW_DISSECTOR,
8110                                                 BPF_FLOW_DISSECTOR),
8111         BPF_EAPROG_SEC("cgroup/bind4",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8112                                                 BPF_CGROUP_INET4_BIND),
8113         BPF_EAPROG_SEC("cgroup/bind6",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8114                                                 BPF_CGROUP_INET6_BIND),
8115         BPF_EAPROG_SEC("cgroup/connect4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8116                                                 BPF_CGROUP_INET4_CONNECT),
8117         BPF_EAPROG_SEC("cgroup/connect6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8118                                                 BPF_CGROUP_INET6_CONNECT),
8119         BPF_EAPROG_SEC("cgroup/sendmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8120                                                 BPF_CGROUP_UDP4_SENDMSG),
8121         BPF_EAPROG_SEC("cgroup/sendmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8122                                                 BPF_CGROUP_UDP6_SENDMSG),
8123         BPF_EAPROG_SEC("cgroup/recvmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8124                                                 BPF_CGROUP_UDP4_RECVMSG),
8125         BPF_EAPROG_SEC("cgroup/recvmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8126                                                 BPF_CGROUP_UDP6_RECVMSG),
8127         BPF_EAPROG_SEC("cgroup/getpeername4",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8128                                                 BPF_CGROUP_INET4_GETPEERNAME),
8129         BPF_EAPROG_SEC("cgroup/getpeername6",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8130                                                 BPF_CGROUP_INET6_GETPEERNAME),
8131         BPF_EAPROG_SEC("cgroup/getsockname4",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8132                                                 BPF_CGROUP_INET4_GETSOCKNAME),
8133         BPF_EAPROG_SEC("cgroup/getsockname6",   BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8134                                                 BPF_CGROUP_INET6_GETSOCKNAME),
8135         BPF_EAPROG_SEC("cgroup/sysctl",         BPF_PROG_TYPE_CGROUP_SYSCTL,
8136                                                 BPF_CGROUP_SYSCTL),
8137         BPF_EAPROG_SEC("cgroup/getsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
8138                                                 BPF_CGROUP_GETSOCKOPT),
8139         BPF_EAPROG_SEC("cgroup/setsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
8140                                                 BPF_CGROUP_SETSOCKOPT),
8141         BPF_PROG_SEC("struct_ops",              BPF_PROG_TYPE_STRUCT_OPS),
8142         BPF_EAPROG_SEC("sk_lookup/",            BPF_PROG_TYPE_SK_LOOKUP,
8143                                                 BPF_SK_LOOKUP),
8144 };
8145
8146 #undef BPF_PROG_SEC_IMPL
8147 #undef BPF_PROG_SEC
8148 #undef BPF_APROG_SEC
8149 #undef BPF_EAPROG_SEC
8150 #undef BPF_APROG_COMPAT
8151 #undef SEC_DEF
8152
8153 #define MAX_TYPE_NAME_SIZE 32
8154
8155 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
8156 {
8157         int i, n = ARRAY_SIZE(section_defs);
8158
8159         for (i = 0; i < n; i++) {
8160                 if (strncmp(sec_name,
8161                             section_defs[i].sec, section_defs[i].len))
8162                         continue;
8163                 return &section_defs[i];
8164         }
8165         return NULL;
8166 }
8167
8168 static char *libbpf_get_type_names(bool attach_type)
8169 {
8170         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
8171         char *buf;
8172
8173         buf = malloc(len);
8174         if (!buf)
8175                 return NULL;
8176
8177         buf[0] = '\0';
8178         /* Forge string buf with all available names */
8179         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8180                 if (attach_type && !section_defs[i].is_attachable)
8181                         continue;
8182
8183                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
8184                         free(buf);
8185                         return NULL;
8186                 }
8187                 strcat(buf, " ");
8188                 strcat(buf, section_defs[i].sec);
8189         }
8190
8191         return buf;
8192 }
8193
8194 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
8195                              enum bpf_attach_type *expected_attach_type)
8196 {
8197         const struct bpf_sec_def *sec_def;
8198         char *type_names;
8199
8200         if (!name)
8201                 return -EINVAL;
8202
8203         sec_def = find_sec_def(name);
8204         if (sec_def) {
8205                 *prog_type = sec_def->prog_type;
8206                 *expected_attach_type = sec_def->expected_attach_type;
8207                 return 0;
8208         }
8209
8210         pr_debug("failed to guess program type from ELF section '%s'\n", name);
8211         type_names = libbpf_get_type_names(false);
8212         if (type_names != NULL) {
8213                 pr_debug("supported section(type) names are:%s\n", type_names);
8214                 free(type_names);
8215         }
8216
8217         return -ESRCH;
8218 }
8219
8220 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
8221                                                      size_t offset)
8222 {
8223         struct bpf_map *map;
8224         size_t i;
8225
8226         for (i = 0; i < obj->nr_maps; i++) {
8227                 map = &obj->maps[i];
8228                 if (!bpf_map__is_struct_ops(map))
8229                         continue;
8230                 if (map->sec_offset <= offset &&
8231                     offset - map->sec_offset < map->def.value_size)
8232                         return map;
8233         }
8234
8235         return NULL;
8236 }
8237
8238 /* Collect the reloc from ELF and populate the st_ops->progs[] */
8239 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
8240                                             GElf_Shdr *shdr, Elf_Data *data)
8241 {
8242         const struct btf_member *member;
8243         struct bpf_struct_ops *st_ops;
8244         struct bpf_program *prog;
8245         unsigned int shdr_idx;
8246         const struct btf *btf;
8247         struct bpf_map *map;
8248         Elf_Data *symbols;
8249         unsigned int moff, insn_idx;
8250         const char *name;
8251         __u32 member_idx;
8252         GElf_Sym sym;
8253         GElf_Rel rel;
8254         int i, nrels;
8255
8256         symbols = obj->efile.symbols;
8257         btf = obj->btf;
8258         nrels = shdr->sh_size / shdr->sh_entsize;
8259         for (i = 0; i < nrels; i++) {
8260                 if (!gelf_getrel(data, i, &rel)) {
8261                         pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
8262                         return -LIBBPF_ERRNO__FORMAT;
8263                 }
8264
8265                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
8266                         pr_warn("struct_ops reloc: symbol %zx not found\n",
8267                                 (size_t)GELF_R_SYM(rel.r_info));
8268                         return -LIBBPF_ERRNO__FORMAT;
8269                 }
8270
8271                 name = elf_sym_str(obj, sym.st_name) ?: "<?>";
8272                 map = find_struct_ops_map_by_offset(obj, rel.r_offset);
8273                 if (!map) {
8274                         pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
8275                                 (size_t)rel.r_offset);
8276                         return -EINVAL;
8277                 }
8278
8279                 moff = rel.r_offset - map->sec_offset;
8280                 shdr_idx = sym.st_shndx;
8281                 st_ops = map->st_ops;
8282                 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
8283                          map->name,
8284                          (long long)(rel.r_info >> 32),
8285                          (long long)sym.st_value,
8286                          shdr_idx, (size_t)rel.r_offset,
8287                          map->sec_offset, sym.st_name, name);
8288
8289                 if (shdr_idx >= SHN_LORESERVE) {
8290                         pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n",
8291                                 map->name, (size_t)rel.r_offset, shdr_idx);
8292                         return -LIBBPF_ERRNO__RELOC;
8293                 }
8294                 if (sym.st_value % BPF_INSN_SZ) {
8295                         pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
8296                                 map->name, (unsigned long long)sym.st_value);
8297                         return -LIBBPF_ERRNO__FORMAT;
8298                 }
8299                 insn_idx = sym.st_value / BPF_INSN_SZ;
8300
8301                 member = find_member_by_offset(st_ops->type, moff * 8);
8302                 if (!member) {
8303                         pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
8304                                 map->name, moff);
8305                         return -EINVAL;
8306                 }
8307                 member_idx = member - btf_members(st_ops->type);
8308                 name = btf__name_by_offset(btf, member->name_off);
8309
8310                 if (!resolve_func_ptr(btf, member->type, NULL)) {
8311                         pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
8312                                 map->name, name);
8313                         return -EINVAL;
8314                 }
8315
8316                 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
8317                 if (!prog) {
8318                         pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
8319                                 map->name, shdr_idx, name);
8320                         return -EINVAL;
8321                 }
8322
8323                 if (prog->type == BPF_PROG_TYPE_UNSPEC) {
8324                         const struct bpf_sec_def *sec_def;
8325
8326                         sec_def = find_sec_def(prog->sec_name);
8327                         if (sec_def &&
8328                             sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
8329                                 /* for pr_warn */
8330                                 prog->type = sec_def->prog_type;
8331                                 goto invalid_prog;
8332                         }
8333
8334                         prog->type = BPF_PROG_TYPE_STRUCT_OPS;
8335                         prog->attach_btf_id = st_ops->type_id;
8336                         prog->expected_attach_type = member_idx;
8337                 } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
8338                            prog->attach_btf_id != st_ops->type_id ||
8339                            prog->expected_attach_type != member_idx) {
8340                         goto invalid_prog;
8341                 }
8342                 st_ops->progs[member_idx] = prog;
8343         }
8344
8345         return 0;
8346
8347 invalid_prog:
8348         pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
8349                 map->name, prog->name, prog->sec_name, prog->type,
8350                 prog->attach_btf_id, prog->expected_attach_type, name);
8351         return -EINVAL;
8352 }
8353
8354 #define BTF_TRACE_PREFIX "btf_trace_"
8355 #define BTF_LSM_PREFIX "bpf_lsm_"
8356 #define BTF_ITER_PREFIX "bpf_iter_"
8357 #define BTF_MAX_NAME_SIZE 128
8358
8359 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
8360                                    const char *name, __u32 kind)
8361 {
8362         char btf_type_name[BTF_MAX_NAME_SIZE];
8363         int ret;
8364
8365         ret = snprintf(btf_type_name, sizeof(btf_type_name),
8366                        "%s%s", prefix, name);
8367         /* snprintf returns the number of characters written excluding the
8368          * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
8369          * indicates truncation.
8370          */
8371         if (ret < 0 || ret >= sizeof(btf_type_name))
8372                 return -ENAMETOOLONG;
8373         return btf__find_by_name_kind(btf, btf_type_name, kind);
8374 }
8375
8376 static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
8377                                         enum bpf_attach_type attach_type)
8378 {
8379         int err;
8380
8381         if (attach_type == BPF_TRACE_RAW_TP)
8382                 err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
8383                                               BTF_KIND_TYPEDEF);
8384         else if (attach_type == BPF_LSM_MAC)
8385                 err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
8386                                               BTF_KIND_FUNC);
8387         else if (attach_type == BPF_TRACE_ITER)
8388                 err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
8389                                               BTF_KIND_FUNC);
8390         else
8391                 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
8392
8393         if (err <= 0)
8394                 pr_warn("%s is not found in vmlinux BTF\n", name);
8395
8396         return err;
8397 }
8398
8399 int libbpf_find_vmlinux_btf_id(const char *name,
8400                                enum bpf_attach_type attach_type)
8401 {
8402         struct btf *btf;
8403         int err;
8404
8405         btf = libbpf_find_kernel_btf();
8406         if (IS_ERR(btf)) {
8407                 pr_warn("vmlinux BTF is not found\n");
8408                 return -EINVAL;
8409         }
8410
8411         err = __find_vmlinux_btf_id(btf, name, attach_type);
8412         btf__free(btf);
8413         return err;
8414 }
8415
8416 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
8417 {
8418         struct bpf_prog_info_linear *info_linear;
8419         struct bpf_prog_info *info;
8420         struct btf *btf = NULL;
8421         int err = -EINVAL;
8422
8423         info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
8424         if (IS_ERR_OR_NULL(info_linear)) {
8425                 pr_warn("failed get_prog_info_linear for FD %d\n",
8426                         attach_prog_fd);
8427                 return -EINVAL;
8428         }
8429         info = &info_linear->info;
8430         if (!info->btf_id) {
8431                 pr_warn("The target program doesn't have BTF\n");
8432                 goto out;
8433         }
8434         if (btf__get_from_id(info->btf_id, &btf)) {
8435                 pr_warn("Failed to get BTF of the program\n");
8436                 goto out;
8437         }
8438         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
8439         btf__free(btf);
8440         if (err <= 0) {
8441                 pr_warn("%s is not found in prog's BTF\n", name);
8442                 goto out;
8443         }
8444 out:
8445         free(info_linear);
8446         return err;
8447 }
8448
8449 static int libbpf_find_attach_btf_id(struct bpf_program *prog)
8450 {
8451         enum bpf_attach_type attach_type = prog->expected_attach_type;
8452         __u32 attach_prog_fd = prog->attach_prog_fd;
8453         const char *name = prog->sec_name;
8454         int i, err;
8455
8456         if (!name)
8457                 return -EINVAL;
8458
8459         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8460                 if (!section_defs[i].is_attach_btf)
8461                         continue;
8462                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
8463                         continue;
8464                 if (attach_prog_fd)
8465                         err = libbpf_find_prog_btf_id(name + section_defs[i].len,
8466                                                       attach_prog_fd);
8467                 else
8468                         err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
8469                                                     name + section_defs[i].len,
8470                                                     attach_type);
8471                 return err;
8472         }
8473         pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
8474         return -ESRCH;
8475 }
8476
8477 int libbpf_attach_type_by_name(const char *name,
8478                                enum bpf_attach_type *attach_type)
8479 {
8480         char *type_names;
8481         int i;
8482
8483         if (!name)
8484                 return -EINVAL;
8485
8486         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8487                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
8488                         continue;
8489                 if (!section_defs[i].is_attachable)
8490                         return -EINVAL;
8491                 *attach_type = section_defs[i].expected_attach_type;
8492                 return 0;
8493         }
8494         pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
8495         type_names = libbpf_get_type_names(true);
8496         if (type_names != NULL) {
8497                 pr_debug("attachable section(type) names are:%s\n", type_names);
8498                 free(type_names);
8499         }
8500
8501         return -EINVAL;
8502 }
8503
8504 int bpf_map__fd(const struct bpf_map *map)
8505 {
8506         return map ? map->fd : -EINVAL;
8507 }
8508
8509 const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
8510 {
8511         return map ? &map->def : ERR_PTR(-EINVAL);
8512 }
8513
8514 const char *bpf_map__name(const struct bpf_map *map)
8515 {
8516         return map ? map->name : NULL;
8517 }
8518
8519 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
8520 {
8521         return map->def.type;
8522 }
8523
8524 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
8525 {
8526         if (map->fd >= 0)
8527                 return -EBUSY;
8528         map->def.type = type;
8529         return 0;
8530 }
8531
8532 __u32 bpf_map__map_flags(const struct bpf_map *map)
8533 {
8534         return map->def.map_flags;
8535 }
8536
8537 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
8538 {
8539         if (map->fd >= 0)
8540                 return -EBUSY;
8541         map->def.map_flags = flags;
8542         return 0;
8543 }
8544
8545 __u32 bpf_map__numa_node(const struct bpf_map *map)
8546 {
8547         return map->numa_node;
8548 }
8549
8550 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
8551 {
8552         if (map->fd >= 0)
8553                 return -EBUSY;
8554         map->numa_node = numa_node;
8555         return 0;
8556 }
8557
8558 __u32 bpf_map__key_size(const struct bpf_map *map)
8559 {
8560         return map->def.key_size;
8561 }
8562
8563 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
8564 {
8565         if (map->fd >= 0)
8566                 return -EBUSY;
8567         map->def.key_size = size;
8568         return 0;
8569 }
8570
8571 __u32 bpf_map__value_size(const struct bpf_map *map)
8572 {
8573         return map->def.value_size;
8574 }
8575
8576 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
8577 {
8578         if (map->fd >= 0)
8579                 return -EBUSY;
8580         map->def.value_size = size;
8581         return 0;
8582 }
8583
8584 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
8585 {
8586         return map ? map->btf_key_type_id : 0;
8587 }
8588
8589 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
8590 {
8591         return map ? map->btf_value_type_id : 0;
8592 }
8593
8594 int bpf_map__set_priv(struct bpf_map *map, void *priv,
8595                      bpf_map_clear_priv_t clear_priv)
8596 {
8597         if (!map)
8598                 return -EINVAL;
8599
8600         if (map->priv) {
8601                 if (map->clear_priv)
8602                         map->clear_priv(map, map->priv);
8603         }
8604
8605         map->priv = priv;
8606         map->clear_priv = clear_priv;
8607         return 0;
8608 }
8609
8610 void *bpf_map__priv(const struct bpf_map *map)
8611 {
8612         return map ? map->priv : ERR_PTR(-EINVAL);
8613 }
8614
8615 int bpf_map__set_initial_value(struct bpf_map *map,
8616                                const void *data, size_t size)
8617 {
8618         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
8619             size != map->def.value_size || map->fd >= 0)
8620                 return -EINVAL;
8621
8622         memcpy(map->mmaped, data, size);
8623         return 0;
8624 }
8625
8626 bool bpf_map__is_offload_neutral(const struct bpf_map *map)
8627 {
8628         return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
8629 }
8630
8631 bool bpf_map__is_internal(const struct bpf_map *map)
8632 {
8633         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
8634 }
8635
8636 __u32 bpf_map__ifindex(const struct bpf_map *map)
8637 {
8638         return map->map_ifindex;
8639 }
8640
8641 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
8642 {
8643         if (map->fd >= 0)
8644                 return -EBUSY;
8645         map->map_ifindex = ifindex;
8646         return 0;
8647 }
8648
8649 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
8650 {
8651         if (!bpf_map_type__is_map_in_map(map->def.type)) {
8652                 pr_warn("error: unsupported map type\n");
8653                 return -EINVAL;
8654         }
8655         if (map->inner_map_fd != -1) {
8656                 pr_warn("error: inner_map_fd already specified\n");
8657                 return -EINVAL;
8658         }
8659         map->inner_map_fd = fd;
8660         return 0;
8661 }
8662
8663 static struct bpf_map *
8664 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
8665 {
8666         ssize_t idx;
8667         struct bpf_map *s, *e;
8668
8669         if (!obj || !obj->maps)
8670                 return NULL;
8671
8672         s = obj->maps;
8673         e = obj->maps + obj->nr_maps;
8674
8675         if ((m < s) || (m >= e)) {
8676                 pr_warn("error in %s: map handler doesn't belong to object\n",
8677                          __func__);
8678                 return NULL;
8679         }
8680
8681         idx = (m - obj->maps) + i;
8682         if (idx >= obj->nr_maps || idx < 0)
8683                 return NULL;
8684         return &obj->maps[idx];
8685 }
8686
8687 struct bpf_map *
8688 bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
8689 {
8690         if (prev == NULL)
8691                 return obj->maps;
8692
8693         return __bpf_map__iter(prev, obj, 1);
8694 }
8695
8696 struct bpf_map *
8697 bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
8698 {
8699         if (next == NULL) {
8700                 if (!obj->nr_maps)
8701                         return NULL;
8702                 return obj->maps + obj->nr_maps - 1;
8703         }
8704
8705         return __bpf_map__iter(next, obj, -1);
8706 }
8707
8708 struct bpf_map *
8709 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
8710 {
8711         struct bpf_map *pos;
8712
8713         bpf_object__for_each_map(pos, obj) {
8714                 if (pos->name && !strcmp(pos->name, name))
8715                         return pos;
8716         }
8717         return NULL;
8718 }
8719
8720 int
8721 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
8722 {
8723         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
8724 }
8725
8726 struct bpf_map *
8727 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
8728 {
8729         return ERR_PTR(-ENOTSUP);
8730 }
8731
8732 long libbpf_get_error(const void *ptr)
8733 {
8734         return PTR_ERR_OR_ZERO(ptr);
8735 }
8736
8737 int bpf_prog_load(const char *file, enum bpf_prog_type type,
8738                   struct bpf_object **pobj, int *prog_fd)
8739 {
8740         struct bpf_prog_load_attr attr;
8741
8742         memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
8743         attr.file = file;
8744         attr.prog_type = type;
8745         attr.expected_attach_type = 0;
8746
8747         return bpf_prog_load_xattr(&attr, pobj, prog_fd);
8748 }
8749
8750 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
8751                         struct bpf_object **pobj, int *prog_fd)
8752 {
8753         struct bpf_object_open_attr open_attr = {};
8754         struct bpf_program *prog, *first_prog = NULL;
8755         struct bpf_object *obj;
8756         struct bpf_map *map;
8757         int err;
8758
8759         if (!attr)
8760                 return -EINVAL;
8761         if (!attr->file)
8762                 return -EINVAL;
8763
8764         open_attr.file = attr->file;
8765         open_attr.prog_type = attr->prog_type;
8766
8767         obj = bpf_object__open_xattr(&open_attr);
8768         if (IS_ERR_OR_NULL(obj))
8769                 return -ENOENT;
8770
8771         bpf_object__for_each_program(prog, obj) {
8772                 enum bpf_attach_type attach_type = attr->expected_attach_type;
8773                 /*
8774                  * to preserve backwards compatibility, bpf_prog_load treats
8775                  * attr->prog_type, if specified, as an override to whatever
8776                  * bpf_object__open guessed
8777                  */
8778                 if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
8779                         bpf_program__set_type(prog, attr->prog_type);
8780                         bpf_program__set_expected_attach_type(prog,
8781                                                               attach_type);
8782                 }
8783                 if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
8784                         /*
8785                          * we haven't guessed from section name and user
8786                          * didn't provide a fallback type, too bad...
8787                          */
8788                         bpf_object__close(obj);
8789                         return -EINVAL;
8790                 }
8791
8792                 prog->prog_ifindex = attr->ifindex;
8793                 prog->log_level = attr->log_level;
8794                 prog->prog_flags |= attr->prog_flags;
8795                 if (!first_prog)
8796                         first_prog = prog;
8797         }
8798
8799         bpf_object__for_each_map(map, obj) {
8800                 if (!bpf_map__is_offload_neutral(map))
8801                         map->map_ifindex = attr->ifindex;
8802         }
8803
8804         if (!first_prog) {
8805                 pr_warn("object file doesn't contain bpf program\n");
8806                 bpf_object__close(obj);
8807                 return -ENOENT;
8808         }
8809
8810         err = bpf_object__load(obj);
8811         if (err) {
8812                 bpf_object__close(obj);
8813                 return err;
8814         }
8815
8816         *pobj = obj;
8817         *prog_fd = bpf_program__fd(first_prog);
8818         return 0;
8819 }
8820
8821 struct bpf_link {
8822         int (*detach)(struct bpf_link *link);
8823         int (*destroy)(struct bpf_link *link);
8824         char *pin_path;         /* NULL, if not pinned */
8825         int fd;                 /* hook FD, -1 if not applicable */
8826         bool disconnected;
8827 };
8828
8829 /* Replace link's underlying BPF program with the new one */
8830 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
8831 {
8832         return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
8833 }
8834
8835 /* Release "ownership" of underlying BPF resource (typically, BPF program
8836  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
8837  * link, when destructed through bpf_link__destroy() call won't attempt to
8838  * detach/unregisted that BPF resource. This is useful in situations where,
8839  * say, attached BPF program has to outlive userspace program that attached it
8840  * in the system. Depending on type of BPF program, though, there might be
8841  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
8842  * exit of userspace program doesn't trigger automatic detachment and clean up
8843  * inside the kernel.
8844  */
8845 void bpf_link__disconnect(struct bpf_link *link)
8846 {
8847         link->disconnected = true;
8848 }
8849
8850 int bpf_link__destroy(struct bpf_link *link)
8851 {
8852         int err = 0;
8853
8854         if (IS_ERR_OR_NULL(link))
8855                 return 0;
8856
8857         if (!link->disconnected && link->detach)
8858                 err = link->detach(link);
8859         if (link->destroy)
8860                 link->destroy(link);
8861         if (link->pin_path)
8862                 free(link->pin_path);
8863         free(link);
8864
8865         return err;
8866 }
8867
8868 int bpf_link__fd(const struct bpf_link *link)
8869 {
8870         return link->fd;
8871 }
8872
8873 const char *bpf_link__pin_path(const struct bpf_link *link)
8874 {
8875         return link->pin_path;
8876 }
8877
8878 static int bpf_link__detach_fd(struct bpf_link *link)
8879 {
8880         return close(link->fd);
8881 }
8882
8883 struct bpf_link *bpf_link__open(const char *path)
8884 {
8885         struct bpf_link *link;
8886         int fd;
8887
8888         fd = bpf_obj_get(path);
8889         if (fd < 0) {
8890                 fd = -errno;
8891                 pr_warn("failed to open link at %s: %d\n", path, fd);
8892                 return ERR_PTR(fd);
8893         }
8894
8895         link = calloc(1, sizeof(*link));
8896         if (!link) {
8897                 close(fd);
8898                 return ERR_PTR(-ENOMEM);
8899         }
8900         link->detach = &bpf_link__detach_fd;
8901         link->fd = fd;
8902
8903         link->pin_path = strdup(path);
8904         if (!link->pin_path) {
8905                 bpf_link__destroy(link);
8906                 return ERR_PTR(-ENOMEM);
8907         }
8908
8909         return link;
8910 }
8911
8912 int bpf_link__detach(struct bpf_link *link)
8913 {
8914         return bpf_link_detach(link->fd) ? -errno : 0;
8915 }
8916
8917 int bpf_link__pin(struct bpf_link *link, const char *path)
8918 {
8919         int err;
8920
8921         if (link->pin_path)
8922                 return -EBUSY;
8923         err = make_parent_dir(path);
8924         if (err)
8925                 return err;
8926         err = check_path(path);
8927         if (err)
8928                 return err;
8929
8930         link->pin_path = strdup(path);
8931         if (!link->pin_path)
8932                 return -ENOMEM;
8933
8934         if (bpf_obj_pin(link->fd, link->pin_path)) {
8935                 err = -errno;
8936                 zfree(&link->pin_path);
8937                 return err;
8938         }
8939
8940         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
8941         return 0;
8942 }
8943
8944 int bpf_link__unpin(struct bpf_link *link)
8945 {
8946         int err;
8947
8948         if (!link->pin_path)
8949                 return -EINVAL;
8950
8951         err = unlink(link->pin_path);
8952         if (err != 0)
8953                 return -errno;
8954
8955         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
8956         zfree(&link->pin_path);
8957         return 0;
8958 }
8959
8960 static int bpf_link__detach_perf_event(struct bpf_link *link)
8961 {
8962         int err;
8963
8964         err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
8965         if (err)
8966                 err = -errno;
8967
8968         close(link->fd);
8969         return err;
8970 }
8971
8972 struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
8973                                                 int pfd)
8974 {
8975         char errmsg[STRERR_BUFSIZE];
8976         struct bpf_link *link;
8977         int prog_fd, err;
8978
8979         if (pfd < 0) {
8980                 pr_warn("prog '%s': invalid perf event FD %d\n",
8981                         prog->name, pfd);
8982                 return ERR_PTR(-EINVAL);
8983         }
8984         prog_fd = bpf_program__fd(prog);
8985         if (prog_fd < 0) {
8986                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
8987                         prog->name);
8988                 return ERR_PTR(-EINVAL);
8989         }
8990
8991         link = calloc(1, sizeof(*link));
8992         if (!link)
8993                 return ERR_PTR(-ENOMEM);
8994         link->detach = &bpf_link__detach_perf_event;
8995         link->fd = pfd;
8996
8997         if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
8998                 err = -errno;
8999                 free(link);
9000                 pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
9001                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9002                 if (err == -EPROTO)
9003                         pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
9004                                 prog->name, pfd);
9005                 return ERR_PTR(err);
9006         }
9007         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
9008                 err = -errno;
9009                 free(link);
9010                 pr_warn("prog '%s': failed to enable pfd %d: %s\n",
9011                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9012                 return ERR_PTR(err);
9013         }
9014         return link;
9015 }
9016
9017 /*
9018  * this function is expected to parse integer in the range of [0, 2^31-1] from
9019  * given file using scanf format string fmt. If actual parsed value is
9020  * negative, the result might be indistinguishable from error
9021  */
9022 static int parse_uint_from_file(const char *file, const char *fmt)
9023 {
9024         char buf[STRERR_BUFSIZE];
9025         int err, ret;
9026         FILE *f;
9027
9028         f = fopen(file, "r");
9029         if (!f) {
9030                 err = -errno;
9031                 pr_debug("failed to open '%s': %s\n", file,
9032                          libbpf_strerror_r(err, buf, sizeof(buf)));
9033                 return err;
9034         }
9035         err = fscanf(f, fmt, &ret);
9036         if (err != 1) {
9037                 err = err == EOF ? -EIO : -errno;
9038                 pr_debug("failed to parse '%s': %s\n", file,
9039                         libbpf_strerror_r(err, buf, sizeof(buf)));
9040                 fclose(f);
9041                 return err;
9042         }
9043         fclose(f);
9044         return ret;
9045 }
9046
9047 static int determine_kprobe_perf_type(void)
9048 {
9049         const char *file = "/sys/bus/event_source/devices/kprobe/type";
9050
9051         return parse_uint_from_file(file, "%d\n");
9052 }
9053
9054 static int determine_uprobe_perf_type(void)
9055 {
9056         const char *file = "/sys/bus/event_source/devices/uprobe/type";
9057
9058         return parse_uint_from_file(file, "%d\n");
9059 }
9060
9061 static int determine_kprobe_retprobe_bit(void)
9062 {
9063         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
9064
9065         return parse_uint_from_file(file, "config:%d\n");
9066 }
9067
9068 static int determine_uprobe_retprobe_bit(void)
9069 {
9070         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
9071
9072         return parse_uint_from_file(file, "config:%d\n");
9073 }
9074
9075 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
9076                                  uint64_t offset, int pid)
9077 {
9078         struct perf_event_attr attr = {};
9079         char errmsg[STRERR_BUFSIZE];
9080         int type, pfd, err;
9081
9082         type = uprobe ? determine_uprobe_perf_type()
9083                       : determine_kprobe_perf_type();
9084         if (type < 0) {
9085                 pr_warn("failed to determine %s perf type: %s\n",
9086                         uprobe ? "uprobe" : "kprobe",
9087                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
9088                 return type;
9089         }
9090         if (retprobe) {
9091                 int bit = uprobe ? determine_uprobe_retprobe_bit()
9092                                  : determine_kprobe_retprobe_bit();
9093
9094                 if (bit < 0) {
9095                         pr_warn("failed to determine %s retprobe bit: %s\n",
9096                                 uprobe ? "uprobe" : "kprobe",
9097                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
9098                         return bit;
9099                 }
9100                 attr.config |= 1 << bit;
9101         }
9102         attr.size = sizeof(attr);
9103         attr.type = type;
9104         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
9105         attr.config2 = offset;           /* kprobe_addr or probe_offset */
9106
9107         /* pid filter is meaningful only for uprobes */
9108         pfd = syscall(__NR_perf_event_open, &attr,
9109                       pid < 0 ? -1 : pid /* pid */,
9110                       pid == -1 ? 0 : -1 /* cpu */,
9111                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
9112         if (pfd < 0) {
9113                 err = -errno;
9114                 pr_warn("%s perf_event_open() failed: %s\n",
9115                         uprobe ? "uprobe" : "kprobe",
9116                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9117                 return err;
9118         }
9119         return pfd;
9120 }
9121
9122 struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
9123                                             bool retprobe,
9124                                             const char *func_name)
9125 {
9126         char errmsg[STRERR_BUFSIZE];
9127         struct bpf_link *link;
9128         int pfd, err;
9129
9130         pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
9131                                     0 /* offset */, -1 /* pid */);
9132         if (pfd < 0) {
9133                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
9134                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
9135                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9136                 return ERR_PTR(pfd);
9137         }
9138         link = bpf_program__attach_perf_event(prog, pfd);
9139         if (IS_ERR(link)) {
9140                 close(pfd);
9141                 err = PTR_ERR(link);
9142                 pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
9143                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
9144                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9145                 return link;
9146         }
9147         return link;
9148 }
9149
9150 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
9151                                       struct bpf_program *prog)
9152 {
9153         const char *func_name;
9154         bool retprobe;
9155
9156         func_name = prog->sec_name + sec->len;
9157         retprobe = strcmp(sec->sec, "kretprobe/") == 0;
9158
9159         return bpf_program__attach_kprobe(prog, retprobe, func_name);
9160 }
9161
9162 struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
9163                                             bool retprobe, pid_t pid,
9164                                             const char *binary_path,
9165                                             size_t func_offset)
9166 {
9167         char errmsg[STRERR_BUFSIZE];
9168         struct bpf_link *link;
9169         int pfd, err;
9170
9171         pfd = perf_event_open_probe(true /* uprobe */, retprobe,
9172                                     binary_path, func_offset, pid);
9173         if (pfd < 0) {
9174                 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
9175                         prog->name, retprobe ? "uretprobe" : "uprobe",
9176                         binary_path, func_offset,
9177                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9178                 return ERR_PTR(pfd);
9179         }
9180         link = bpf_program__attach_perf_event(prog, pfd);
9181         if (IS_ERR(link)) {
9182                 close(pfd);
9183                 err = PTR_ERR(link);
9184                 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
9185                         prog->name, retprobe ? "uretprobe" : "uprobe",
9186                         binary_path, func_offset,
9187                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9188                 return link;
9189         }
9190         return link;
9191 }
9192
9193 static int determine_tracepoint_id(const char *tp_category,
9194                                    const char *tp_name)
9195 {
9196         char file[PATH_MAX];
9197         int ret;
9198
9199         ret = snprintf(file, sizeof(file),
9200                        "/sys/kernel/debug/tracing/events/%s/%s/id",
9201                        tp_category, tp_name);
9202         if (ret < 0)
9203                 return -errno;
9204         if (ret >= sizeof(file)) {
9205                 pr_debug("tracepoint %s/%s path is too long\n",
9206                          tp_category, tp_name);
9207                 return -E2BIG;
9208         }
9209         return parse_uint_from_file(file, "%d\n");
9210 }
9211
9212 static int perf_event_open_tracepoint(const char *tp_category,
9213                                       const char *tp_name)
9214 {
9215         struct perf_event_attr attr = {};
9216         char errmsg[STRERR_BUFSIZE];
9217         int tp_id, pfd, err;
9218
9219         tp_id = determine_tracepoint_id(tp_category, tp_name);
9220         if (tp_id < 0) {
9221                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
9222                         tp_category, tp_name,
9223                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
9224                 return tp_id;
9225         }
9226
9227         attr.type = PERF_TYPE_TRACEPOINT;
9228         attr.size = sizeof(attr);
9229         attr.config = tp_id;
9230
9231         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
9232                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
9233         if (pfd < 0) {
9234                 err = -errno;
9235                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
9236                         tp_category, tp_name,
9237                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9238                 return err;
9239         }
9240         return pfd;
9241 }
9242
9243 struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
9244                                                 const char *tp_category,
9245                                                 const char *tp_name)
9246 {
9247         char errmsg[STRERR_BUFSIZE];
9248         struct bpf_link *link;
9249         int pfd, err;
9250
9251         pfd = perf_event_open_tracepoint(tp_category, tp_name);
9252         if (pfd < 0) {
9253                 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
9254                         prog->name, tp_category, tp_name,
9255                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9256                 return ERR_PTR(pfd);
9257         }
9258         link = bpf_program__attach_perf_event(prog, pfd);
9259         if (IS_ERR(link)) {
9260                 close(pfd);
9261                 err = PTR_ERR(link);
9262                 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
9263                         prog->name, tp_category, tp_name,
9264                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9265                 return link;
9266         }
9267         return link;
9268 }
9269
9270 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
9271                                   struct bpf_program *prog)
9272 {
9273         char *sec_name, *tp_cat, *tp_name;
9274         struct bpf_link *link;
9275
9276         sec_name = strdup(prog->sec_name);
9277         if (!sec_name)
9278                 return ERR_PTR(-ENOMEM);
9279
9280         /* extract "tp/<category>/<name>" */
9281         tp_cat = sec_name + sec->len;
9282         tp_name = strchr(tp_cat, '/');
9283         if (!tp_name) {
9284                 link = ERR_PTR(-EINVAL);
9285                 goto out;
9286         }
9287         *tp_name = '\0';
9288         tp_name++;
9289
9290         link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
9291 out:
9292         free(sec_name);
9293         return link;
9294 }
9295
9296 struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
9297                                                     const char *tp_name)
9298 {
9299         char errmsg[STRERR_BUFSIZE];
9300         struct bpf_link *link;
9301         int prog_fd, pfd;
9302
9303         prog_fd = bpf_program__fd(prog);
9304         if (prog_fd < 0) {
9305                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9306                 return ERR_PTR(-EINVAL);
9307         }
9308
9309         link = calloc(1, sizeof(*link));
9310         if (!link)
9311                 return ERR_PTR(-ENOMEM);
9312         link->detach = &bpf_link__detach_fd;
9313
9314         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
9315         if (pfd < 0) {
9316                 pfd = -errno;
9317                 free(link);
9318                 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
9319                         prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9320                 return ERR_PTR(pfd);
9321         }
9322         link->fd = pfd;
9323         return link;
9324 }
9325
9326 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
9327                                       struct bpf_program *prog)
9328 {
9329         const char *tp_name = prog->sec_name + sec->len;
9330
9331         return bpf_program__attach_raw_tracepoint(prog, tp_name);
9332 }
9333
9334 /* Common logic for all BPF program types that attach to a btf_id */
9335 static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
9336 {
9337         char errmsg[STRERR_BUFSIZE];
9338         struct bpf_link *link;
9339         int prog_fd, pfd;
9340
9341         prog_fd = bpf_program__fd(prog);
9342         if (prog_fd < 0) {
9343                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9344                 return ERR_PTR(-EINVAL);
9345         }
9346
9347         link = calloc(1, sizeof(*link));
9348         if (!link)
9349                 return ERR_PTR(-ENOMEM);
9350         link->detach = &bpf_link__detach_fd;
9351
9352         pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
9353         if (pfd < 0) {
9354                 pfd = -errno;
9355                 free(link);
9356                 pr_warn("prog '%s': failed to attach: %s\n",
9357                         prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9358                 return ERR_PTR(pfd);
9359         }
9360         link->fd = pfd;
9361         return (struct bpf_link *)link;
9362 }
9363
9364 struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
9365 {
9366         return bpf_program__attach_btf_id(prog);
9367 }
9368
9369 struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
9370 {
9371         return bpf_program__attach_btf_id(prog);
9372 }
9373
9374 static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
9375                                      struct bpf_program *prog)
9376 {
9377         return bpf_program__attach_trace(prog);
9378 }
9379
9380 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
9381                                    struct bpf_program *prog)
9382 {
9383         return bpf_program__attach_lsm(prog);
9384 }
9385
9386 static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
9387                                     struct bpf_program *prog)
9388 {
9389         return bpf_program__attach_iter(prog, NULL);
9390 }
9391
9392 static struct bpf_link *
9393 bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
9394                        const char *target_name)
9395 {
9396         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
9397                             .target_btf_id = btf_id);
9398         enum bpf_attach_type attach_type;
9399         char errmsg[STRERR_BUFSIZE];
9400         struct bpf_link *link;
9401         int prog_fd, link_fd;
9402
9403         prog_fd = bpf_program__fd(prog);
9404         if (prog_fd < 0) {
9405                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9406                 return ERR_PTR(-EINVAL);
9407         }
9408
9409         link = calloc(1, sizeof(*link));
9410         if (!link)
9411                 return ERR_PTR(-ENOMEM);
9412         link->detach = &bpf_link__detach_fd;
9413
9414         attach_type = bpf_program__get_expected_attach_type(prog);
9415         link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
9416         if (link_fd < 0) {
9417                 link_fd = -errno;
9418                 free(link);
9419                 pr_warn("prog '%s': failed to attach to %s: %s\n",
9420                         prog->name, target_name,
9421                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
9422                 return ERR_PTR(link_fd);
9423         }
9424         link->fd = link_fd;
9425         return link;
9426 }
9427
9428 struct bpf_link *
9429 bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
9430 {
9431         return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
9432 }
9433
9434 struct bpf_link *
9435 bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
9436 {
9437         return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
9438 }
9439
9440 struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
9441 {
9442         /* target_fd/target_ifindex use the same field in LINK_CREATE */
9443         return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
9444 }
9445
9446 struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
9447                                               int target_fd,
9448                                               const char *attach_func_name)
9449 {
9450         int btf_id;
9451
9452         if (!!target_fd != !!attach_func_name) {
9453                 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
9454                         prog->name);
9455                 return ERR_PTR(-EINVAL);
9456         }
9457
9458         if (prog->type != BPF_PROG_TYPE_EXT) {
9459                 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
9460                         prog->name);
9461                 return ERR_PTR(-EINVAL);
9462         }
9463
9464         if (target_fd) {
9465                 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
9466                 if (btf_id < 0)
9467                         return ERR_PTR(btf_id);
9468
9469                 return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
9470         } else {
9471                 /* no target, so use raw_tracepoint_open for compatibility
9472                  * with old kernels
9473                  */
9474                 return bpf_program__attach_trace(prog);
9475         }
9476 }
9477
9478 struct bpf_link *
9479 bpf_program__attach_iter(struct bpf_program *prog,
9480                          const struct bpf_iter_attach_opts *opts)
9481 {
9482         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
9483         char errmsg[STRERR_BUFSIZE];
9484         struct bpf_link *link;
9485         int prog_fd, link_fd;
9486         __u32 target_fd = 0;
9487
9488         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
9489                 return ERR_PTR(-EINVAL);
9490
9491         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
9492         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
9493
9494         prog_fd = bpf_program__fd(prog);
9495         if (prog_fd < 0) {
9496                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9497                 return ERR_PTR(-EINVAL);
9498         }
9499
9500         link = calloc(1, sizeof(*link));
9501         if (!link)
9502                 return ERR_PTR(-ENOMEM);
9503         link->detach = &bpf_link__detach_fd;
9504
9505         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
9506                                   &link_create_opts);
9507         if (link_fd < 0) {
9508                 link_fd = -errno;
9509                 free(link);
9510                 pr_warn("prog '%s': failed to attach to iterator: %s\n",
9511                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
9512                 return ERR_PTR(link_fd);
9513         }
9514         link->fd = link_fd;
9515         return link;
9516 }
9517
9518 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
9519 {
9520         const struct bpf_sec_def *sec_def;
9521
9522         sec_def = find_sec_def(prog->sec_name);
9523         if (!sec_def || !sec_def->attach_fn)
9524                 return ERR_PTR(-ESRCH);
9525
9526         return sec_def->attach_fn(sec_def, prog);
9527 }
9528
9529 static int bpf_link__detach_struct_ops(struct bpf_link *link)
9530 {
9531         __u32 zero = 0;
9532
9533         if (bpf_map_delete_elem(link->fd, &zero))
9534                 return -errno;
9535
9536         return 0;
9537 }
9538
9539 struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
9540 {
9541         struct bpf_struct_ops *st_ops;
9542         struct bpf_link *link;
9543         __u32 i, zero = 0;
9544         int err;
9545
9546         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
9547                 return ERR_PTR(-EINVAL);
9548
9549         link = calloc(1, sizeof(*link));
9550         if (!link)
9551                 return ERR_PTR(-EINVAL);
9552
9553         st_ops = map->st_ops;
9554         for (i = 0; i < btf_vlen(st_ops->type); i++) {
9555                 struct bpf_program *prog = st_ops->progs[i];
9556                 void *kern_data;
9557                 int prog_fd;
9558
9559                 if (!prog)
9560                         continue;
9561
9562                 prog_fd = bpf_program__fd(prog);
9563                 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
9564                 *(unsigned long *)kern_data = prog_fd;
9565         }
9566
9567         err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
9568         if (err) {
9569                 err = -errno;
9570                 free(link);
9571                 return ERR_PTR(err);
9572         }
9573
9574         link->detach = bpf_link__detach_struct_ops;
9575         link->fd = map->fd;
9576
9577         return link;
9578 }
9579
9580 enum bpf_perf_event_ret
9581 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
9582                            void **copy_mem, size_t *copy_size,
9583                            bpf_perf_event_print_t fn, void *private_data)
9584 {
9585         struct perf_event_mmap_page *header = mmap_mem;
9586         __u64 data_head = ring_buffer_read_head(header);
9587         __u64 data_tail = header->data_tail;
9588         void *base = ((__u8 *)header) + page_size;
9589         int ret = LIBBPF_PERF_EVENT_CONT;
9590         struct perf_event_header *ehdr;
9591         size_t ehdr_size;
9592
9593         while (data_head != data_tail) {
9594                 ehdr = base + (data_tail & (mmap_size - 1));
9595                 ehdr_size = ehdr->size;
9596
9597                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
9598                         void *copy_start = ehdr;
9599                         size_t len_first = base + mmap_size - copy_start;
9600                         size_t len_secnd = ehdr_size - len_first;
9601
9602                         if (*copy_size < ehdr_size) {
9603                                 free(*copy_mem);
9604                                 *copy_mem = malloc(ehdr_size);
9605                                 if (!*copy_mem) {
9606                                         *copy_size = 0;
9607                                         ret = LIBBPF_PERF_EVENT_ERROR;
9608                                         break;
9609                                 }
9610                                 *copy_size = ehdr_size;
9611                         }
9612
9613                         memcpy(*copy_mem, copy_start, len_first);
9614                         memcpy(*copy_mem + len_first, base, len_secnd);
9615                         ehdr = *copy_mem;
9616                 }
9617
9618                 ret = fn(ehdr, private_data);
9619                 data_tail += ehdr_size;
9620                 if (ret != LIBBPF_PERF_EVENT_CONT)
9621                         break;
9622         }
9623
9624         ring_buffer_write_tail(header, data_tail);
9625         return ret;
9626 }
9627
9628 struct perf_buffer;
9629
9630 struct perf_buffer_params {
9631         struct perf_event_attr *attr;
9632         /* if event_cb is specified, it takes precendence */
9633         perf_buffer_event_fn event_cb;
9634         /* sample_cb and lost_cb are higher-level common-case callbacks */
9635         perf_buffer_sample_fn sample_cb;
9636         perf_buffer_lost_fn lost_cb;
9637         void *ctx;
9638         int cpu_cnt;
9639         int *cpus;
9640         int *map_keys;
9641 };
9642
9643 struct perf_cpu_buf {
9644         struct perf_buffer *pb;
9645         void *base; /* mmap()'ed memory */
9646         void *buf; /* for reconstructing segmented data */
9647         size_t buf_size;
9648         int fd;
9649         int cpu;
9650         int map_key;
9651 };
9652
9653 struct perf_buffer {
9654         perf_buffer_event_fn event_cb;
9655         perf_buffer_sample_fn sample_cb;
9656         perf_buffer_lost_fn lost_cb;
9657         void *ctx; /* passed into callbacks */
9658
9659         size_t page_size;
9660         size_t mmap_size;
9661         struct perf_cpu_buf **cpu_bufs;
9662         struct epoll_event *events;
9663         int cpu_cnt; /* number of allocated CPU buffers */
9664         int epoll_fd; /* perf event FD */
9665         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
9666 };
9667
9668 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
9669                                       struct perf_cpu_buf *cpu_buf)
9670 {
9671         if (!cpu_buf)
9672                 return;
9673         if (cpu_buf->base &&
9674             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
9675                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
9676         if (cpu_buf->fd >= 0) {
9677                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
9678                 close(cpu_buf->fd);
9679         }
9680         free(cpu_buf->buf);
9681         free(cpu_buf);
9682 }
9683
9684 void perf_buffer__free(struct perf_buffer *pb)
9685 {
9686         int i;
9687
9688         if (IS_ERR_OR_NULL(pb))
9689                 return;
9690         if (pb->cpu_bufs) {
9691                 for (i = 0; i < pb->cpu_cnt; i++) {
9692                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
9693
9694                         if (!cpu_buf)
9695                                 continue;
9696
9697                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
9698                         perf_buffer__free_cpu_buf(pb, cpu_buf);
9699                 }
9700                 free(pb->cpu_bufs);
9701         }
9702         if (pb->epoll_fd >= 0)
9703                 close(pb->epoll_fd);
9704         free(pb->events);
9705         free(pb);
9706 }
9707
9708 static struct perf_cpu_buf *
9709 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
9710                           int cpu, int map_key)
9711 {
9712         struct perf_cpu_buf *cpu_buf;
9713         char msg[STRERR_BUFSIZE];
9714         int err;
9715
9716         cpu_buf = calloc(1, sizeof(*cpu_buf));
9717         if (!cpu_buf)
9718                 return ERR_PTR(-ENOMEM);
9719
9720         cpu_buf->pb = pb;
9721         cpu_buf->cpu = cpu;
9722         cpu_buf->map_key = map_key;
9723
9724         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
9725                               -1, PERF_FLAG_FD_CLOEXEC);
9726         if (cpu_buf->fd < 0) {
9727                 err = -errno;
9728                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
9729                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
9730                 goto error;
9731         }
9732
9733         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
9734                              PROT_READ | PROT_WRITE, MAP_SHARED,
9735                              cpu_buf->fd, 0);
9736         if (cpu_buf->base == MAP_FAILED) {
9737                 cpu_buf->base = NULL;
9738                 err = -errno;
9739                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
9740                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
9741                 goto error;
9742         }
9743
9744         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
9745                 err = -errno;
9746                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
9747                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
9748                 goto error;
9749         }
9750
9751         return cpu_buf;
9752
9753 error:
9754         perf_buffer__free_cpu_buf(pb, cpu_buf);
9755         return (struct perf_cpu_buf *)ERR_PTR(err);
9756 }
9757
9758 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
9759                                               struct perf_buffer_params *p);
9760
9761 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
9762                                      const struct perf_buffer_opts *opts)
9763 {
9764         struct perf_buffer_params p = {};
9765         struct perf_event_attr attr = { 0, };
9766
9767         attr.config = PERF_COUNT_SW_BPF_OUTPUT;
9768         attr.type = PERF_TYPE_SOFTWARE;
9769         attr.sample_type = PERF_SAMPLE_RAW;
9770         attr.sample_period = 1;
9771         attr.wakeup_events = 1;
9772
9773         p.attr = &attr;
9774         p.sample_cb = opts ? opts->sample_cb : NULL;
9775         p.lost_cb = opts ? opts->lost_cb : NULL;
9776         p.ctx = opts ? opts->ctx : NULL;
9777
9778         return __perf_buffer__new(map_fd, page_cnt, &p);
9779 }
9780
9781 struct perf_buffer *
9782 perf_buffer__new_raw(int map_fd, size_t page_cnt,
9783                      const struct perf_buffer_raw_opts *opts)
9784 {
9785         struct perf_buffer_params p = {};
9786
9787         p.attr = opts->attr;
9788         p.event_cb = opts->event_cb;
9789         p.ctx = opts->ctx;
9790         p.cpu_cnt = opts->cpu_cnt;
9791         p.cpus = opts->cpus;
9792         p.map_keys = opts->map_keys;
9793
9794         return __perf_buffer__new(map_fd, page_cnt, &p);
9795 }
9796
9797 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
9798                                               struct perf_buffer_params *p)
9799 {
9800         const char *online_cpus_file = "/sys/devices/system/cpu/online";
9801         struct bpf_map_info map;
9802         char msg[STRERR_BUFSIZE];
9803         struct perf_buffer *pb;
9804         bool *online = NULL;
9805         __u32 map_info_len;
9806         int err, i, j, n;
9807
9808         if (page_cnt & (page_cnt - 1)) {
9809                 pr_warn("page count should be power of two, but is %zu\n",
9810                         page_cnt);
9811                 return ERR_PTR(-EINVAL);
9812         }
9813
9814         /* best-effort sanity checks */
9815         memset(&map, 0, sizeof(map));
9816         map_info_len = sizeof(map);
9817         err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
9818         if (err) {
9819                 err = -errno;
9820                 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
9821                  * -EBADFD, -EFAULT, or -E2BIG on real error
9822                  */
9823                 if (err != -EINVAL) {
9824                         pr_warn("failed to get map info for map FD %d: %s\n",
9825                                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
9826                         return ERR_PTR(err);
9827                 }
9828                 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
9829                          map_fd);
9830         } else {
9831                 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
9832                         pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
9833                                 map.name);
9834                         return ERR_PTR(-EINVAL);
9835                 }
9836         }
9837
9838         pb = calloc(1, sizeof(*pb));
9839         if (!pb)
9840                 return ERR_PTR(-ENOMEM);
9841
9842         pb->event_cb = p->event_cb;
9843         pb->sample_cb = p->sample_cb;
9844         pb->lost_cb = p->lost_cb;
9845         pb->ctx = p->ctx;
9846
9847         pb->page_size = getpagesize();
9848         pb->mmap_size = pb->page_size * page_cnt;
9849         pb->map_fd = map_fd;
9850
9851         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
9852         if (pb->epoll_fd < 0) {
9853                 err = -errno;
9854                 pr_warn("failed to create epoll instance: %s\n",
9855                         libbpf_strerror_r(err, msg, sizeof(msg)));
9856                 goto error;
9857         }
9858
9859         if (p->cpu_cnt > 0) {
9860                 pb->cpu_cnt = p->cpu_cnt;
9861         } else {
9862                 pb->cpu_cnt = libbpf_num_possible_cpus();
9863                 if (pb->cpu_cnt < 0) {
9864                         err = pb->cpu_cnt;
9865                         goto error;
9866                 }
9867                 if (map.max_entries && map.max_entries < pb->cpu_cnt)
9868                         pb->cpu_cnt = map.max_entries;
9869         }
9870
9871         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
9872         if (!pb->events) {
9873                 err = -ENOMEM;
9874                 pr_warn("failed to allocate events: out of memory\n");
9875                 goto error;
9876         }
9877         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
9878         if (!pb->cpu_bufs) {
9879                 err = -ENOMEM;
9880                 pr_warn("failed to allocate buffers: out of memory\n");
9881                 goto error;
9882         }
9883
9884         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
9885         if (err) {
9886                 pr_warn("failed to get online CPU mask: %d\n", err);
9887                 goto error;
9888         }
9889
9890         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
9891                 struct perf_cpu_buf *cpu_buf;
9892                 int cpu, map_key;
9893
9894                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
9895                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
9896
9897                 /* in case user didn't explicitly requested particular CPUs to
9898                  * be attached to, skip offline/not present CPUs
9899                  */
9900                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
9901                         continue;
9902
9903                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
9904                 if (IS_ERR(cpu_buf)) {
9905                         err = PTR_ERR(cpu_buf);
9906                         goto error;
9907                 }
9908
9909                 pb->cpu_bufs[j] = cpu_buf;
9910
9911                 err = bpf_map_update_elem(pb->map_fd, &map_key,
9912                                           &cpu_buf->fd, 0);
9913                 if (err) {
9914                         err = -errno;
9915                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
9916                                 cpu, map_key, cpu_buf->fd,
9917                                 libbpf_strerror_r(err, msg, sizeof(msg)));
9918                         goto error;
9919                 }
9920
9921                 pb->events[j].events = EPOLLIN;
9922                 pb->events[j].data.ptr = cpu_buf;
9923                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
9924                               &pb->events[j]) < 0) {
9925                         err = -errno;
9926                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
9927                                 cpu, cpu_buf->fd,
9928                                 libbpf_strerror_r(err, msg, sizeof(msg)));
9929                         goto error;
9930                 }
9931                 j++;
9932         }
9933         pb->cpu_cnt = j;
9934         free(online);
9935
9936         return pb;
9937
9938 error:
9939         free(online);
9940         if (pb)
9941                 perf_buffer__free(pb);
9942         return ERR_PTR(err);
9943 }
9944
9945 struct perf_sample_raw {
9946         struct perf_event_header header;
9947         uint32_t size;
9948         char data[];
9949 };
9950
9951 struct perf_sample_lost {
9952         struct perf_event_header header;
9953         uint64_t id;
9954         uint64_t lost;
9955         uint64_t sample_id;
9956 };
9957
9958 static enum bpf_perf_event_ret
9959 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
9960 {
9961         struct perf_cpu_buf *cpu_buf = ctx;
9962         struct perf_buffer *pb = cpu_buf->pb;
9963         void *data = e;
9964
9965         /* user wants full control over parsing perf event */
9966         if (pb->event_cb)
9967                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
9968
9969         switch (e->type) {
9970         case PERF_RECORD_SAMPLE: {
9971                 struct perf_sample_raw *s = data;
9972
9973                 if (pb->sample_cb)
9974                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
9975                 break;
9976         }
9977         case PERF_RECORD_LOST: {
9978                 struct perf_sample_lost *s = data;
9979
9980                 if (pb->lost_cb)
9981                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
9982                 break;
9983         }
9984         default:
9985                 pr_warn("unknown perf sample type %d\n", e->type);
9986                 return LIBBPF_PERF_EVENT_ERROR;
9987         }
9988         return LIBBPF_PERF_EVENT_CONT;
9989 }
9990
9991 static int perf_buffer__process_records(struct perf_buffer *pb,
9992                                         struct perf_cpu_buf *cpu_buf)
9993 {
9994         enum bpf_perf_event_ret ret;
9995
9996         ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
9997                                          pb->page_size, &cpu_buf->buf,
9998                                          &cpu_buf->buf_size,
9999                                          perf_buffer__process_record, cpu_buf);
10000         if (ret != LIBBPF_PERF_EVENT_CONT)
10001                 return ret;
10002         return 0;
10003 }
10004
10005 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
10006 {
10007         return pb->epoll_fd;
10008 }
10009
10010 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
10011 {
10012         int i, cnt, err;
10013
10014         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
10015         for (i = 0; i < cnt; i++) {
10016                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
10017
10018                 err = perf_buffer__process_records(pb, cpu_buf);
10019                 if (err) {
10020                         pr_warn("error while processing records: %d\n", err);
10021                         return err;
10022                 }
10023         }
10024         return cnt < 0 ? -errno : cnt;
10025 }
10026
10027 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
10028  * manager.
10029  */
10030 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
10031 {
10032         return pb->cpu_cnt;
10033 }
10034
10035 /*
10036  * Return perf_event FD of a ring buffer in *buf_idx* slot of
10037  * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
10038  * select()/poll()/epoll() Linux syscalls.
10039  */
10040 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
10041 {
10042         struct perf_cpu_buf *cpu_buf;
10043
10044         if (buf_idx >= pb->cpu_cnt)
10045                 return -EINVAL;
10046
10047         cpu_buf = pb->cpu_bufs[buf_idx];
10048         if (!cpu_buf)
10049                 return -ENOENT;
10050
10051         return cpu_buf->fd;
10052 }
10053
10054 /*
10055  * Consume data from perf ring buffer corresponding to slot *buf_idx* in
10056  * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
10057  * consume, do nothing and return success.
10058  * Returns:
10059  *   - 0 on success;
10060  *   - <0 on failure.
10061  */
10062 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
10063 {
10064         struct perf_cpu_buf *cpu_buf;
10065
10066         if (buf_idx >= pb->cpu_cnt)
10067                 return -EINVAL;
10068
10069         cpu_buf = pb->cpu_bufs[buf_idx];
10070         if (!cpu_buf)
10071                 return -ENOENT;
10072
10073         return perf_buffer__process_records(pb, cpu_buf);
10074 }
10075
10076 int perf_buffer__consume(struct perf_buffer *pb)
10077 {
10078         int i, err;
10079
10080         for (i = 0; i < pb->cpu_cnt; i++) {
10081                 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
10082
10083                 if (!cpu_buf)
10084                         continue;
10085
10086                 err = perf_buffer__process_records(pb, cpu_buf);
10087                 if (err) {
10088                         pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
10089                         return err;
10090                 }
10091         }
10092         return 0;
10093 }
10094
10095 struct bpf_prog_info_array_desc {
10096         int     array_offset;   /* e.g. offset of jited_prog_insns */
10097         int     count_offset;   /* e.g. offset of jited_prog_len */
10098         int     size_offset;    /* > 0: offset of rec size,
10099                                  * < 0: fix size of -size_offset
10100                                  */
10101 };
10102
10103 static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
10104         [BPF_PROG_INFO_JITED_INSNS] = {
10105                 offsetof(struct bpf_prog_info, jited_prog_insns),
10106                 offsetof(struct bpf_prog_info, jited_prog_len),
10107                 -1,
10108         },
10109         [BPF_PROG_INFO_XLATED_INSNS] = {
10110                 offsetof(struct bpf_prog_info, xlated_prog_insns),
10111                 offsetof(struct bpf_prog_info, xlated_prog_len),
10112                 -1,
10113         },
10114         [BPF_PROG_INFO_MAP_IDS] = {
10115                 offsetof(struct bpf_prog_info, map_ids),
10116                 offsetof(struct bpf_prog_info, nr_map_ids),
10117                 -(int)sizeof(__u32),
10118         },
10119         [BPF_PROG_INFO_JITED_KSYMS] = {
10120                 offsetof(struct bpf_prog_info, jited_ksyms),
10121                 offsetof(struct bpf_prog_info, nr_jited_ksyms),
10122                 -(int)sizeof(__u64),
10123         },
10124         [BPF_PROG_INFO_JITED_FUNC_LENS] = {
10125                 offsetof(struct bpf_prog_info, jited_func_lens),
10126                 offsetof(struct bpf_prog_info, nr_jited_func_lens),
10127                 -(int)sizeof(__u32),
10128         },
10129         [BPF_PROG_INFO_FUNC_INFO] = {
10130                 offsetof(struct bpf_prog_info, func_info),
10131                 offsetof(struct bpf_prog_info, nr_func_info),
10132                 offsetof(struct bpf_prog_info, func_info_rec_size),
10133         },
10134         [BPF_PROG_INFO_LINE_INFO] = {
10135                 offsetof(struct bpf_prog_info, line_info),
10136                 offsetof(struct bpf_prog_info, nr_line_info),
10137                 offsetof(struct bpf_prog_info, line_info_rec_size),
10138         },
10139         [BPF_PROG_INFO_JITED_LINE_INFO] = {
10140                 offsetof(struct bpf_prog_info, jited_line_info),
10141                 offsetof(struct bpf_prog_info, nr_jited_line_info),
10142                 offsetof(struct bpf_prog_info, jited_line_info_rec_size),
10143         },
10144         [BPF_PROG_INFO_PROG_TAGS] = {
10145                 offsetof(struct bpf_prog_info, prog_tags),
10146                 offsetof(struct bpf_prog_info, nr_prog_tags),
10147                 -(int)sizeof(__u8) * BPF_TAG_SIZE,
10148         },
10149
10150 };
10151
10152 static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
10153                                            int offset)
10154 {
10155         __u32 *array = (__u32 *)info;
10156
10157         if (offset >= 0)
10158                 return array[offset / sizeof(__u32)];
10159         return -(int)offset;
10160 }
10161
10162 static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
10163                                            int offset)
10164 {
10165         __u64 *array = (__u64 *)info;
10166
10167         if (offset >= 0)
10168                 return array[offset / sizeof(__u64)];
10169         return -(int)offset;
10170 }
10171
10172 static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
10173                                          __u32 val)
10174 {
10175         __u32 *array = (__u32 *)info;
10176
10177         if (offset >= 0)
10178                 array[offset / sizeof(__u32)] = val;
10179 }
10180
10181 static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
10182                                          __u64 val)
10183 {
10184         __u64 *array = (__u64 *)info;
10185
10186         if (offset >= 0)
10187                 array[offset / sizeof(__u64)] = val;
10188 }
10189
10190 struct bpf_prog_info_linear *
10191 bpf_program__get_prog_info_linear(int fd, __u64 arrays)
10192 {
10193         struct bpf_prog_info_linear *info_linear;
10194         struct bpf_prog_info info = {};
10195         __u32 info_len = sizeof(info);
10196         __u32 data_len = 0;
10197         int i, err;
10198         void *ptr;
10199
10200         if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
10201                 return ERR_PTR(-EINVAL);
10202
10203         /* step 1: get array dimensions */
10204         err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
10205         if (err) {
10206                 pr_debug("can't get prog info: %s", strerror(errno));
10207                 return ERR_PTR(-EFAULT);
10208         }
10209
10210         /* step 2: calculate total size of all arrays */
10211         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10212                 bool include_array = (arrays & (1UL << i)) > 0;
10213                 struct bpf_prog_info_array_desc *desc;
10214                 __u32 count, size;
10215
10216                 desc = bpf_prog_info_array_desc + i;
10217
10218                 /* kernel is too old to support this field */
10219                 if (info_len < desc->array_offset + sizeof(__u32) ||
10220                     info_len < desc->count_offset + sizeof(__u32) ||
10221                     (desc->size_offset > 0 && info_len < desc->size_offset))
10222                         include_array = false;
10223
10224                 if (!include_array) {
10225                         arrays &= ~(1UL << i);  /* clear the bit */
10226                         continue;
10227                 }
10228
10229                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
10230                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
10231
10232                 data_len += count * size;
10233         }
10234
10235         /* step 3: allocate continuous memory */
10236         data_len = roundup(data_len, sizeof(__u64));
10237         info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
10238         if (!info_linear)
10239                 return ERR_PTR(-ENOMEM);
10240
10241         /* step 4: fill data to info_linear->info */
10242         info_linear->arrays = arrays;
10243         memset(&info_linear->info, 0, sizeof(info));
10244         ptr = info_linear->data;
10245
10246         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10247                 struct bpf_prog_info_array_desc *desc;
10248                 __u32 count, size;
10249
10250                 if ((arrays & (1UL << i)) == 0)
10251                         continue;
10252
10253                 desc  = bpf_prog_info_array_desc + i;
10254                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
10255                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
10256                 bpf_prog_info_set_offset_u32(&info_linear->info,
10257                                              desc->count_offset, count);
10258                 bpf_prog_info_set_offset_u32(&info_linear->info,
10259                                              desc->size_offset, size);
10260                 bpf_prog_info_set_offset_u64(&info_linear->info,
10261                                              desc->array_offset,
10262                                              ptr_to_u64(ptr));
10263                 ptr += count * size;
10264         }
10265
10266         /* step 5: call syscall again to get required arrays */
10267         err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
10268         if (err) {
10269                 pr_debug("can't get prog info: %s", strerror(errno));
10270                 free(info_linear);
10271                 return ERR_PTR(-EFAULT);
10272         }
10273
10274         /* step 6: verify the data */
10275         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10276                 struct bpf_prog_info_array_desc *desc;
10277                 __u32 v1, v2;
10278
10279                 if ((arrays & (1UL << i)) == 0)
10280                         continue;
10281
10282                 desc = bpf_prog_info_array_desc + i;
10283                 v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
10284                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
10285                                                    desc->count_offset);
10286                 if (v1 != v2)
10287                         pr_warn("%s: mismatch in element count\n", __func__);
10288
10289                 v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
10290                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
10291                                                    desc->size_offset);
10292                 if (v1 != v2)
10293                         pr_warn("%s: mismatch in rec size\n", __func__);
10294         }
10295
10296         /* step 7: update info_len and data_len */
10297         info_linear->info_len = sizeof(struct bpf_prog_info);
10298         info_linear->data_len = data_len;
10299
10300         return info_linear;
10301 }
10302
10303 void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
10304 {
10305         int i;
10306
10307         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10308                 struct bpf_prog_info_array_desc *desc;
10309                 __u64 addr, offs;
10310
10311                 if ((info_linear->arrays & (1UL << i)) == 0)
10312                         continue;
10313
10314                 desc = bpf_prog_info_array_desc + i;
10315                 addr = bpf_prog_info_read_offset_u64(&info_linear->info,
10316                                                      desc->array_offset);
10317                 offs = addr - ptr_to_u64(info_linear->data);
10318                 bpf_prog_info_set_offset_u64(&info_linear->info,
10319                                              desc->array_offset, offs);
10320         }
10321 }
10322
10323 void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
10324 {
10325         int i;
10326
10327         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10328                 struct bpf_prog_info_array_desc *desc;
10329                 __u64 addr, offs;
10330
10331                 if ((info_linear->arrays & (1UL << i)) == 0)
10332                         continue;
10333
10334                 desc = bpf_prog_info_array_desc + i;
10335                 offs = bpf_prog_info_read_offset_u64(&info_linear->info,
10336                                                      desc->array_offset);
10337                 addr = offs + ptr_to_u64(info_linear->data);
10338                 bpf_prog_info_set_offset_u64(&info_linear->info,
10339                                              desc->array_offset, addr);
10340         }
10341 }
10342
10343 int bpf_program__set_attach_target(struct bpf_program *prog,
10344                                    int attach_prog_fd,
10345                                    const char *attach_func_name)
10346 {
10347         int btf_id;
10348
10349         if (!prog || attach_prog_fd < 0 || !attach_func_name)
10350                 return -EINVAL;
10351
10352         if (attach_prog_fd)
10353                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
10354                                                  attach_prog_fd);
10355         else
10356                 btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
10357                                                attach_func_name,
10358                                                prog->expected_attach_type);
10359
10360         if (btf_id < 0)
10361                 return btf_id;
10362
10363         prog->attach_btf_id = btf_id;
10364         prog->attach_prog_fd = attach_prog_fd;
10365         return 0;
10366 }
10367
10368 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
10369 {
10370         int err = 0, n, len, start, end = -1;
10371         bool *tmp;
10372
10373         *mask = NULL;
10374         *mask_sz = 0;
10375
10376         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
10377         while (*s) {
10378                 if (*s == ',' || *s == '\n') {
10379                         s++;
10380                         continue;
10381                 }
10382                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
10383                 if (n <= 0 || n > 2) {
10384                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
10385                         err = -EINVAL;
10386                         goto cleanup;
10387                 } else if (n == 1) {
10388                         end = start;
10389                 }
10390                 if (start < 0 || start > end) {
10391                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
10392                                 start, end, s);
10393                         err = -EINVAL;
10394                         goto cleanup;
10395                 }
10396                 tmp = realloc(*mask, end + 1);
10397                 if (!tmp) {
10398                         err = -ENOMEM;
10399                         goto cleanup;
10400                 }
10401                 *mask = tmp;
10402                 memset(tmp + *mask_sz, 0, start - *mask_sz);
10403                 memset(tmp + start, 1, end - start + 1);
10404                 *mask_sz = end + 1;
10405                 s += len;
10406         }
10407         if (!*mask_sz) {
10408                 pr_warn("Empty CPU range\n");
10409                 return -EINVAL;
10410         }
10411         return 0;
10412 cleanup:
10413         free(*mask);
10414         *mask = NULL;
10415         return err;
10416 }
10417
10418 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
10419 {
10420         int fd, err = 0, len;
10421         char buf[128];
10422
10423         fd = open(fcpu, O_RDONLY);
10424         if (fd < 0) {
10425                 err = -errno;
10426                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
10427                 return err;
10428         }
10429         len = read(fd, buf, sizeof(buf));
10430         close(fd);
10431         if (len <= 0) {
10432                 err = len ? -errno : -EINVAL;
10433                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
10434                 return err;
10435         }
10436         if (len >= sizeof(buf)) {
10437                 pr_warn("CPU mask is too big in file %s\n", fcpu);
10438                 return -E2BIG;
10439         }
10440         buf[len] = '\0';
10441
10442         return parse_cpu_mask_str(buf, mask, mask_sz);
10443 }
10444
10445 int libbpf_num_possible_cpus(void)
10446 {
10447         static const char *fcpu = "/sys/devices/system/cpu/possible";
10448         static int cpus;
10449         int err, n, i, tmp_cpus;
10450         bool *mask;
10451
10452         tmp_cpus = READ_ONCE(cpus);
10453         if (tmp_cpus > 0)
10454                 return tmp_cpus;
10455
10456         err = parse_cpu_mask_file(fcpu, &mask, &n);
10457         if (err)
10458                 return err;
10459
10460         tmp_cpus = 0;
10461         for (i = 0; i < n; i++) {
10462                 if (mask[i])
10463                         tmp_cpus++;
10464         }
10465         free(mask);
10466
10467         WRITE_ONCE(cpus, tmp_cpus);
10468         return tmp_cpus;
10469 }
10470
10471 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
10472                               const struct bpf_object_open_opts *opts)
10473 {
10474         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
10475                 .object_name = s->name,
10476         );
10477         struct bpf_object *obj;
10478         int i;
10479
10480         /* Attempt to preserve opts->object_name, unless overriden by user
10481          * explicitly. Overwriting object name for skeletons is discouraged,
10482          * as it breaks global data maps, because they contain object name
10483          * prefix as their own map name prefix. When skeleton is generated,
10484          * bpftool is making an assumption that this name will stay the same.
10485          */
10486         if (opts) {
10487                 memcpy(&skel_opts, opts, sizeof(*opts));
10488                 if (!opts->object_name)
10489                         skel_opts.object_name = s->name;
10490         }
10491
10492         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
10493         if (IS_ERR(obj)) {
10494                 pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
10495                         s->name, PTR_ERR(obj));
10496                 return PTR_ERR(obj);
10497         }
10498
10499         *s->obj = obj;
10500
10501         for (i = 0; i < s->map_cnt; i++) {
10502                 struct bpf_map **map = s->maps[i].map;
10503                 const char *name = s->maps[i].name;
10504                 void **mmaped = s->maps[i].mmaped;
10505
10506                 *map = bpf_object__find_map_by_name(obj, name);
10507                 if (!*map) {
10508                         pr_warn("failed to find skeleton map '%s'\n", name);
10509                         return -ESRCH;
10510                 }
10511
10512                 /* externs shouldn't be pre-setup from user code */
10513                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
10514                         *mmaped = (*map)->mmaped;
10515         }
10516
10517         for (i = 0; i < s->prog_cnt; i++) {
10518                 struct bpf_program **prog = s->progs[i].prog;
10519                 const char *name = s->progs[i].name;
10520
10521                 *prog = bpf_object__find_program_by_name(obj, name);
10522                 if (!*prog) {
10523                         pr_warn("failed to find skeleton program '%s'\n", name);
10524                         return -ESRCH;
10525                 }
10526         }
10527
10528         return 0;
10529 }
10530
10531 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
10532 {
10533         int i, err;
10534
10535         err = bpf_object__load(*s->obj);
10536         if (err) {
10537                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
10538                 return err;
10539         }
10540
10541         for (i = 0; i < s->map_cnt; i++) {
10542                 struct bpf_map *map = *s->maps[i].map;
10543                 size_t mmap_sz = bpf_map_mmap_sz(map);
10544                 int prot, map_fd = bpf_map__fd(map);
10545                 void **mmaped = s->maps[i].mmaped;
10546
10547                 if (!mmaped)
10548                         continue;
10549
10550                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
10551                         *mmaped = NULL;
10552                         continue;
10553                 }
10554
10555                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
10556                         prot = PROT_READ;
10557                 else
10558                         prot = PROT_READ | PROT_WRITE;
10559
10560                 /* Remap anonymous mmap()-ed "map initialization image" as
10561                  * a BPF map-backed mmap()-ed memory, but preserving the same
10562                  * memory address. This will cause kernel to change process'
10563                  * page table to point to a different piece of kernel memory,
10564                  * but from userspace point of view memory address (and its
10565                  * contents, being identical at this point) will stay the
10566                  * same. This mapping will be released by bpf_object__close()
10567                  * as per normal clean up procedure, so we don't need to worry
10568                  * about it from skeleton's clean up perspective.
10569                  */
10570                 *mmaped = mmap(map->mmaped, mmap_sz, prot,
10571                                 MAP_SHARED | MAP_FIXED, map_fd, 0);
10572                 if (*mmaped == MAP_FAILED) {
10573                         err = -errno;
10574                         *mmaped = NULL;
10575                         pr_warn("failed to re-mmap() map '%s': %d\n",
10576                                  bpf_map__name(map), err);
10577                         return err;
10578                 }
10579         }
10580
10581         return 0;
10582 }
10583
10584 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
10585 {
10586         int i;
10587
10588         for (i = 0; i < s->prog_cnt; i++) {
10589                 struct bpf_program *prog = *s->progs[i].prog;
10590                 struct bpf_link **link = s->progs[i].link;
10591                 const struct bpf_sec_def *sec_def;
10592
10593                 if (!prog->load)
10594                         continue;
10595
10596                 sec_def = find_sec_def(prog->sec_name);
10597                 if (!sec_def || !sec_def->attach_fn)
10598                         continue;
10599
10600                 *link = sec_def->attach_fn(sec_def, prog);
10601                 if (IS_ERR(*link)) {
10602                         pr_warn("failed to auto-attach program '%s': %ld\n",
10603                                 bpf_program__name(prog), PTR_ERR(*link));
10604                         return PTR_ERR(*link);
10605                 }
10606         }
10607
10608         return 0;
10609 }
10610
10611 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
10612 {
10613         int i;
10614
10615         for (i = 0; i < s->prog_cnt; i++) {
10616                 struct bpf_link **link = s->progs[i].link;
10617
10618                 bpf_link__destroy(*link);
10619                 *link = NULL;
10620         }
10621 }
10622
10623 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
10624 {
10625         if (s->progs)
10626                 bpf_object__detach_skeleton(s);
10627         if (s->obj)
10628                 bpf_object__close(*s->obj);
10629         free(s->maps);
10630         free(s->progs);
10631         free(s);
10632 }