1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 /* Copyright (c) 2019 Facebook */
7 #include <linux/string.h>
8 #include <linux/bpf_verifier.h>
11 static const char *btf_kind_str(const struct btf_type *t)
13 return btf_type_str(t);
16 static bool is_ldimm64_insn(struct bpf_insn *insn)
18 return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
21 static const struct btf_type *
22 skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id)
24 return btf_type_skip_modifiers(btf, id, res_id);
27 static const char *btf__name_by_offset(const struct btf *btf, u32 offset)
29 return btf_name_by_offset(btf, offset);
32 static s64 btf__resolve_size(const struct btf *btf, u32 type_id)
34 const struct btf_type *t;
37 t = btf_type_by_id(btf, type_id);
38 t = btf_resolve_size(btf, t, &size);
44 enum libbpf_print_level {
53 #define pr_warn(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__)
54 #define pr_info(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__)
55 #define pr_debug(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__)
56 #define libbpf_print(level, fmt, ...) bpf_log((void *)prog_name, fmt, ##__VA_ARGS__)
62 #include <linux/err.h>
67 #include "str_error.h"
68 #include "libbpf_internal.h"
71 static bool is_flex_arr(const struct btf *btf,
72 const struct bpf_core_accessor *acc,
73 const struct btf_array *arr)
75 const struct btf_type *t;
77 /* not a flexible array, if not inside a struct or has non-zero size */
78 if (!acc->name || arr->nelems > 0)
81 /* has to be the last member of enclosing struct */
82 t = btf_type_by_id(btf, acc->type_id);
83 return acc->idx == btf_vlen(t) - 1;
86 static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
89 case BPF_CORE_FIELD_BYTE_OFFSET: return "byte_off";
90 case BPF_CORE_FIELD_BYTE_SIZE: return "byte_sz";
91 case BPF_CORE_FIELD_EXISTS: return "field_exists";
92 case BPF_CORE_FIELD_SIGNED: return "signed";
93 case BPF_CORE_FIELD_LSHIFT_U64: return "lshift_u64";
94 case BPF_CORE_FIELD_RSHIFT_U64: return "rshift_u64";
95 case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id";
96 case BPF_CORE_TYPE_ID_TARGET: return "target_type_id";
97 case BPF_CORE_TYPE_EXISTS: return "type_exists";
98 case BPF_CORE_TYPE_SIZE: return "type_size";
99 case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists";
100 case BPF_CORE_ENUMVAL_VALUE: return "enumval_value";
101 default: return "unknown";
105 static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
108 case BPF_CORE_FIELD_BYTE_OFFSET:
109 case BPF_CORE_FIELD_BYTE_SIZE:
110 case BPF_CORE_FIELD_EXISTS:
111 case BPF_CORE_FIELD_SIGNED:
112 case BPF_CORE_FIELD_LSHIFT_U64:
113 case BPF_CORE_FIELD_RSHIFT_U64:
120 static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
123 case BPF_CORE_TYPE_ID_LOCAL:
124 case BPF_CORE_TYPE_ID_TARGET:
125 case BPF_CORE_TYPE_EXISTS:
126 case BPF_CORE_TYPE_SIZE:
133 static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
136 case BPF_CORE_ENUMVAL_EXISTS:
137 case BPF_CORE_ENUMVAL_VALUE:
144 int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
145 const struct btf *targ_btf, __u32 targ_id, int level)
147 const struct btf_type *local_type, *targ_type;
148 int depth = 32; /* max recursion depth */
150 /* caller made sure that names match (ignoring flavor suffix) */
151 local_type = btf_type_by_id(local_btf, local_id);
152 targ_type = btf_type_by_id(targ_btf, targ_id);
153 if (!btf_kind_core_compat(local_type, targ_type))
161 local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
162 targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
163 if (!local_type || !targ_type)
166 if (!btf_kind_core_compat(local_type, targ_type))
169 switch (btf_kind(local_type)) {
171 case BTF_KIND_STRUCT:
175 case BTF_KIND_ENUM64:
178 /* just reject deprecated bitfield-like integers; all other
179 * integers are by default compatible between each other
181 return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
183 local_id = local_type->type;
184 targ_id = targ_type->type;
187 local_id = btf_array(local_type)->type;
188 targ_id = btf_array(targ_type)->type;
190 case BTF_KIND_FUNC_PROTO: {
191 struct btf_param *local_p = btf_params(local_type);
192 struct btf_param *targ_p = btf_params(targ_type);
193 __u16 local_vlen = btf_vlen(local_type);
194 __u16 targ_vlen = btf_vlen(targ_type);
197 if (local_vlen != targ_vlen)
200 for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
204 skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
205 skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
206 err = __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
212 /* tail recurse for return type check */
213 skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
214 skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
218 pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
219 btf_kind_str(local_type), local_id, targ_id);
225 * Turn bpf_core_relo into a low- and high-level spec representation,
226 * validating correctness along the way, as well as calculating resulting
227 * field bit offset, specified by accessor string. Low-level spec captures
228 * every single level of nestedness, including traversing anonymous
229 * struct/union members. High-level one only captures semantically meaningful
230 * "turning points": named fields and array indicies.
231 * E.g., for this case:
242 * struct sample *s = ...;
244 * int x = &s->a[3]; // access string = '0:1:2:3'
246 * Low-level spec has 1:1 mapping with each element of access string (it's
247 * just a parsed access string representation): [0, 1, 2, 3].
249 * High-level spec will capture only 3 points:
250 * - initial zero-index access by pointer (&s->... is the same as &s[0]...);
251 * - field 'a' access (corresponds to '2' in low-level spec);
252 * - array element #3 access (corresponds to '3' in low-level spec).
254 * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
255 * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
256 * spec and raw_spec are kept empty.
258 * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
259 * string to specify enumerator's value index that need to be relocated.
261 int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
262 const struct bpf_core_relo *relo,
263 struct bpf_core_spec *spec)
265 int access_idx, parsed_len, i;
266 struct bpf_core_accessor *acc;
267 const struct btf_type *t;
268 const char *name, *spec_str;
272 spec_str = btf__name_by_offset(btf, relo->access_str_off);
273 if (str_is_empty(spec_str) || *spec_str == ':')
276 memset(spec, 0, sizeof(*spec));
278 spec->root_type_id = relo->type_id;
279 spec->relo_kind = relo->kind;
281 /* type-based relocations don't have a field access string */
282 if (core_relo_is_type_based(relo->kind)) {
283 if (strcmp(spec_str, "0"))
288 /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
290 if (*spec_str == ':')
292 if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
294 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
296 spec_str += parsed_len;
297 spec->raw_spec[spec->raw_len++] = access_idx;
300 if (spec->raw_len == 0)
303 t = skip_mods_and_typedefs(btf, relo->type_id, &id);
307 access_idx = spec->raw_spec[0];
308 acc = &spec->spec[0];
310 acc->idx = access_idx;
313 if (core_relo_is_enumval_based(relo->kind)) {
314 if (!btf_is_any_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
317 /* record enumerator name in a first accessor */
318 name_off = btf_is_enum(t) ? btf_enum(t)[access_idx].name_off
319 : btf_enum64(t)[access_idx].name_off;
320 acc->name = btf__name_by_offset(btf, name_off);
324 if (!core_relo_is_field_based(relo->kind))
327 sz = btf__resolve_size(btf, id);
330 spec->bit_offset = access_idx * sz * 8;
332 for (i = 1; i < spec->raw_len; i++) {
333 t = skip_mods_and_typedefs(btf, id, &id);
337 access_idx = spec->raw_spec[i];
338 acc = &spec->spec[spec->len];
340 if (btf_is_composite(t)) {
341 const struct btf_member *m;
344 if (access_idx >= btf_vlen(t))
347 bit_offset = btf_member_bit_offset(t, access_idx);
348 spec->bit_offset += bit_offset;
350 m = btf_members(t) + access_idx;
352 name = btf__name_by_offset(btf, m->name_off);
353 if (str_is_empty(name))
357 acc->idx = access_idx;
363 } else if (btf_is_array(t)) {
364 const struct btf_array *a = btf_array(t);
367 t = skip_mods_and_typedefs(btf, a->type, &id);
371 flex = is_flex_arr(btf, acc - 1, a);
372 if (!flex && access_idx >= a->nelems)
375 spec->spec[spec->len].type_id = id;
376 spec->spec[spec->len].idx = access_idx;
379 sz = btf__resolve_size(btf, id);
382 spec->bit_offset += access_idx * sz * 8;
384 pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
385 prog_name, relo->type_id, spec_str, i, id, btf_kind_str(t));
393 /* Check two types for compatibility for the purpose of field access
394 * relocation. const/volatile/restrict and typedefs are skipped to ensure we
395 * are relocating semantically compatible entities:
396 * - any two STRUCTs/UNIONs are compatible and can be mixed;
397 * - any two FWDs are compatible, if their names match (modulo flavor suffix);
398 * - any two PTRs are always compatible;
399 * - for ENUMs, names should be the same (ignoring flavor suffix) or at
400 * least one of enums should be anonymous;
401 * - for ENUMs, check sizes, names are ignored;
402 * - for INT, size and signedness are ignored;
403 * - any two FLOATs are always compatible;
404 * - for ARRAY, dimensionality is ignored, element types are checked for
405 * compatibility recursively;
406 * - everything else shouldn't be ever a target of relocation.
407 * These rules are not set in stone and probably will be adjusted as we get
408 * more experience with using BPF CO-RE relocations.
410 static int bpf_core_fields_are_compat(const struct btf *local_btf,
412 const struct btf *targ_btf,
415 const struct btf_type *local_type, *targ_type;
418 local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
419 targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
420 if (!local_type || !targ_type)
423 if (btf_is_composite(local_type) && btf_is_composite(targ_type))
425 if (!btf_kind_core_compat(local_type, targ_type))
428 switch (btf_kind(local_type)) {
433 case BTF_KIND_ENUM64:
434 case BTF_KIND_ENUM: {
435 const char *local_name, *targ_name;
436 size_t local_len, targ_len;
438 local_name = btf__name_by_offset(local_btf,
439 local_type->name_off);
440 targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
441 local_len = bpf_core_essential_name_len(local_name);
442 targ_len = bpf_core_essential_name_len(targ_name);
443 /* one of them is anonymous or both w/ same flavor-less names */
444 return local_len == 0 || targ_len == 0 ||
445 (local_len == targ_len &&
446 strncmp(local_name, targ_name, local_len) == 0);
449 /* just reject deprecated bitfield-like integers; all other
450 * integers are by default compatible between each other
452 return btf_int_offset(local_type) == 0 &&
453 btf_int_offset(targ_type) == 0;
455 local_id = btf_array(local_type)->type;
456 targ_id = btf_array(targ_type)->type;
464 * Given single high-level named field accessor in local type, find
465 * corresponding high-level accessor for a target type. Along the way,
466 * maintain low-level spec for target as well. Also keep updating target
469 * Searching is performed through recursive exhaustive enumeration of all
470 * fields of a struct/union. If there are any anonymous (embedded)
471 * structs/unions, they are recursively searched as well. If field with
472 * desired name is found, check compatibility between local and target types,
473 * before returning result.
475 * 1 is returned, if field is found.
476 * 0 is returned if no compatible field is found.
477 * <0 is returned on error.
479 static int bpf_core_match_member(const struct btf *local_btf,
480 const struct bpf_core_accessor *local_acc,
481 const struct btf *targ_btf,
483 struct bpf_core_spec *spec,
486 const struct btf_type *local_type, *targ_type;
487 const struct btf_member *local_member, *m;
488 const char *local_name, *targ_name;
492 targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
495 if (!btf_is_composite(targ_type))
498 local_id = local_acc->type_id;
499 local_type = btf_type_by_id(local_btf, local_id);
500 local_member = btf_members(local_type) + local_acc->idx;
501 local_name = btf__name_by_offset(local_btf, local_member->name_off);
503 n = btf_vlen(targ_type);
504 m = btf_members(targ_type);
505 for (i = 0; i < n; i++, m++) {
508 bit_offset = btf_member_bit_offset(targ_type, i);
510 /* too deep struct/union/array nesting */
511 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
514 /* speculate this member will be the good one */
515 spec->bit_offset += bit_offset;
516 spec->raw_spec[spec->raw_len++] = i;
518 targ_name = btf__name_by_offset(targ_btf, m->name_off);
519 if (str_is_empty(targ_name)) {
520 /* embedded struct/union, we need to go deeper */
521 found = bpf_core_match_member(local_btf, local_acc,
524 if (found) /* either found or error */
526 } else if (strcmp(local_name, targ_name) == 0) {
527 /* matching named field */
528 struct bpf_core_accessor *targ_acc;
530 targ_acc = &spec->spec[spec->len++];
531 targ_acc->type_id = targ_id;
533 targ_acc->name = targ_name;
535 *next_targ_id = m->type;
536 found = bpf_core_fields_are_compat(local_btf,
540 spec->len--; /* pop accessor */
543 /* member turned out not to be what we looked for */
544 spec->bit_offset -= bit_offset;
552 * Try to match local spec to a target type and, if successful, produce full
553 * target spec (high-level, low-level + bit offset).
555 static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
556 const struct btf *targ_btf, __u32 targ_id,
557 struct bpf_core_spec *targ_spec)
559 const struct btf_type *targ_type;
560 const struct bpf_core_accessor *local_acc;
561 struct bpf_core_accessor *targ_acc;
565 memset(targ_spec, 0, sizeof(*targ_spec));
566 targ_spec->btf = targ_btf;
567 targ_spec->root_type_id = targ_id;
568 targ_spec->relo_kind = local_spec->relo_kind;
570 if (core_relo_is_type_based(local_spec->relo_kind)) {
571 return bpf_core_types_are_compat(local_spec->btf,
572 local_spec->root_type_id,
576 local_acc = &local_spec->spec[0];
577 targ_acc = &targ_spec->spec[0];
579 if (core_relo_is_enumval_based(local_spec->relo_kind)) {
580 size_t local_essent_len, targ_essent_len;
581 const char *targ_name;
583 /* has to resolve to an enum */
584 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
585 if (!btf_is_any_enum(targ_type))
588 local_essent_len = bpf_core_essential_name_len(local_acc->name);
590 for (i = 0; i < btf_vlen(targ_type); i++) {
591 if (btf_is_enum(targ_type))
592 name_off = btf_enum(targ_type)[i].name_off;
594 name_off = btf_enum64(targ_type)[i].name_off;
596 targ_name = btf__name_by_offset(targ_spec->btf, name_off);
597 targ_essent_len = bpf_core_essential_name_len(targ_name);
598 if (targ_essent_len != local_essent_len)
600 if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
601 targ_acc->type_id = targ_id;
603 targ_acc->name = targ_name;
605 targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
606 targ_spec->raw_len++;
613 if (!core_relo_is_field_based(local_spec->relo_kind))
616 for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
617 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
622 if (local_acc->name) {
623 matched = bpf_core_match_member(local_spec->btf,
626 targ_spec, &targ_id);
630 /* for i=0, targ_id is already treated as array element
631 * type (because it's the original struct), for others
632 * we should find array element type first
635 const struct btf_array *a;
638 if (!btf_is_array(targ_type))
641 a = btf_array(targ_type);
642 flex = is_flex_arr(targ_btf, targ_acc - 1, a);
643 if (!flex && local_acc->idx >= a->nelems)
645 if (!skip_mods_and_typedefs(targ_btf, a->type,
650 /* too deep struct/union/array nesting */
651 if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
654 targ_acc->type_id = targ_id;
655 targ_acc->idx = local_acc->idx;
656 targ_acc->name = NULL;
658 targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
659 targ_spec->raw_len++;
661 sz = btf__resolve_size(targ_btf, targ_id);
664 targ_spec->bit_offset += local_acc->idx * sz * 8;
671 static int bpf_core_calc_field_relo(const char *prog_name,
672 const struct bpf_core_relo *relo,
673 const struct bpf_core_spec *spec,
674 __u64 *val, __u32 *field_sz, __u32 *type_id,
677 const struct bpf_core_accessor *acc;
678 const struct btf_type *t;
679 __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
680 const struct btf_member *m;
681 const struct btf_type *mt;
687 if (relo->kind == BPF_CORE_FIELD_EXISTS) {
693 return -EUCLEAN; /* request instruction poisoning */
695 acc = &spec->spec[spec->len - 1];
696 t = btf_type_by_id(spec->btf, acc->type_id);
698 /* a[n] accessor needs special handling */
700 if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) {
701 *val = spec->bit_offset / 8;
702 /* remember field size for load/store mem size */
703 sz = btf__resolve_size(spec->btf, acc->type_id);
707 *type_id = acc->type_id;
708 } else if (relo->kind == BPF_CORE_FIELD_BYTE_SIZE) {
709 sz = btf__resolve_size(spec->btf, acc->type_id);
714 pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
715 prog_name, relo->kind, relo->insn_off / 8);
723 m = btf_members(t) + acc->idx;
724 mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
725 bit_off = spec->bit_offset;
726 bit_sz = btf_member_bitfield_size(t, acc->idx);
728 bitfield = bit_sz > 0;
731 byte_off = bit_off / 8 / byte_sz * byte_sz;
732 /* figure out smallest int size necessary for bitfield load */
733 while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
735 /* bitfield can't be read with 64-bit read */
736 pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
737 prog_name, relo->kind, relo->insn_off / 8);
741 byte_off = bit_off / 8 / byte_sz * byte_sz;
744 sz = btf__resolve_size(spec->btf, field_type_id);
748 byte_off = spec->bit_offset / 8;
749 bit_sz = byte_sz * 8;
752 /* for bitfields, all the relocatable aspects are ambiguous and we
753 * might disagree with compiler, so turn off validation of expected
754 * value, except for signedness
757 *validate = !bitfield;
759 switch (relo->kind) {
760 case BPF_CORE_FIELD_BYTE_OFFSET:
764 *type_id = field_type_id;
767 case BPF_CORE_FIELD_BYTE_SIZE:
770 case BPF_CORE_FIELD_SIGNED:
771 *val = (btf_is_any_enum(mt) && BTF_INFO_KFLAG(mt->info)) ||
772 (btf_int_encoding(mt) & BTF_INT_SIGNED);
774 *validate = true; /* signedness is never ambiguous */
776 case BPF_CORE_FIELD_LSHIFT_U64:
777 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
778 *val = 64 - (bit_off + bit_sz - byte_off * 8);
780 *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
783 case BPF_CORE_FIELD_RSHIFT_U64:
786 *validate = true; /* right shift is never ambiguous */
788 case BPF_CORE_FIELD_EXISTS:
796 static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
797 const struct bpf_core_spec *spec,
798 __u64 *val, bool *validate)
802 /* by default, always check expected value in bpf_insn */
806 /* type-based relos return zero when target type is not found */
812 switch (relo->kind) {
813 case BPF_CORE_TYPE_ID_TARGET:
814 *val = spec->root_type_id;
815 /* type ID, embedded in bpf_insn, might change during linking,
816 * so enforcing it is pointless
821 case BPF_CORE_TYPE_EXISTS:
824 case BPF_CORE_TYPE_SIZE:
825 sz = btf__resolve_size(spec->btf, spec->root_type_id);
830 case BPF_CORE_TYPE_ID_LOCAL:
831 /* BPF_CORE_TYPE_ID_LOCAL is handled specially and shouldn't get here */
839 static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
840 const struct bpf_core_spec *spec,
843 const struct btf_type *t;
845 switch (relo->kind) {
846 case BPF_CORE_ENUMVAL_EXISTS:
849 case BPF_CORE_ENUMVAL_VALUE:
851 return -EUCLEAN; /* request instruction poisoning */
852 t = btf_type_by_id(spec->btf, spec->spec[0].type_id);
854 *val = btf_enum(t)[spec->spec[0].idx].val;
856 *val = btf_enum64_value(btf_enum64(t) + spec->spec[0].idx);
865 /* Calculate original and target relocation values, given local and target
866 * specs and relocation kind. These values are calculated for each candidate.
867 * If there are multiple candidates, resulting values should all be consistent
868 * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
869 * If instruction has to be poisoned, *poison will be set to true.
871 static int bpf_core_calc_relo(const char *prog_name,
872 const struct bpf_core_relo *relo,
874 const struct bpf_core_spec *local_spec,
875 const struct bpf_core_spec *targ_spec,
876 struct bpf_core_relo_res *res)
878 int err = -EOPNOTSUPP;
883 res->validate = true;
884 res->fail_memsz_adjust = false;
885 res->orig_sz = res->new_sz = 0;
886 res->orig_type_id = res->new_type_id = 0;
888 if (core_relo_is_field_based(relo->kind)) {
889 err = bpf_core_calc_field_relo(prog_name, relo, local_spec,
890 &res->orig_val, &res->orig_sz,
891 &res->orig_type_id, &res->validate);
892 err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec,
893 &res->new_val, &res->new_sz,
894 &res->new_type_id, NULL);
897 /* Validate if it's safe to adjust load/store memory size.
898 * Adjustments are performed only if original and new memory
901 res->fail_memsz_adjust = false;
902 if (res->orig_sz != res->new_sz) {
903 const struct btf_type *orig_t, *new_t;
905 orig_t = btf_type_by_id(local_spec->btf, res->orig_type_id);
906 new_t = btf_type_by_id(targ_spec->btf, res->new_type_id);
908 /* There are two use cases in which it's safe to
909 * adjust load/store's mem size:
910 * - reading a 32-bit kernel pointer, while on BPF
911 * size pointers are always 64-bit; in this case
912 * it's safe to "downsize" instruction size due to
913 * pointer being treated as unsigned integer with
914 * zero-extended upper 32-bits;
915 * - reading unsigned integers, again due to
916 * zero-extension is preserving the value correctly.
918 * In all other cases it's incorrect to attempt to
919 * load/store field because read value will be
920 * incorrect, so we poison relocated instruction.
922 if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
924 if (btf_is_int(orig_t) && btf_is_int(new_t) &&
925 btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
926 btf_int_encoding(new_t) != BTF_INT_SIGNED)
929 /* mark as invalid mem size adjustment, but this will
930 * only be checked for LDX/STX/ST insns
932 res->fail_memsz_adjust = true;
934 } else if (core_relo_is_type_based(relo->kind)) {
935 err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val, &res->validate);
936 err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val, NULL);
937 } else if (core_relo_is_enumval_based(relo->kind)) {
938 err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
939 err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
943 if (err == -EUCLEAN) {
944 /* EUCLEAN is used to signal instruction poisoning request */
947 } else if (err == -EOPNOTSUPP) {
948 /* EOPNOTSUPP means unknown/unsupported relocation */
949 pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
950 prog_name, relo_idx, core_relo_kind_str(relo->kind),
951 relo->kind, relo->insn_off / 8);
958 * Turn instruction for which CO_RE relocation failed into invalid one with
959 * distinct signature.
961 static void bpf_core_poison_insn(const char *prog_name, int relo_idx,
962 int insn_idx, struct bpf_insn *insn)
964 pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
965 prog_name, relo_idx, insn_idx);
966 insn->code = BPF_JMP | BPF_CALL;
970 /* if this instruction is reachable (not a dead code),
971 * verifier will complain with the following message:
972 * invalid func unknown#195896080
974 insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
977 static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
979 switch (BPF_SIZE(insn->code)) {
980 case BPF_DW: return 8;
981 case BPF_W: return 4;
982 case BPF_H: return 2;
983 case BPF_B: return 1;
988 static int insn_bytes_to_bpf_size(__u32 sz)
991 case 8: return BPF_DW;
992 case 4: return BPF_W;
993 case 2: return BPF_H;
994 case 1: return BPF_B;
1000 * Patch relocatable BPF instruction.
1002 * Patched value is determined by relocation kind and target specification.
1003 * For existence relocations target spec will be NULL if field/type is not found.
1004 * Expected insn->imm value is determined using relocation kind and local
1005 * spec, and is checked before patching instruction. If actual insn->imm value
1006 * is wrong, bail out with error.
1008 * Currently supported classes of BPF instruction are:
1009 * 1. rX = <imm> (assignment with immediate operand);
1010 * 2. rX += <imm> (arithmetic operations with immediate operand);
1011 * 3. rX = <imm64> (load with 64-bit immediate value);
1012 * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
1013 * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
1014 * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
1016 int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
1017 int insn_idx, const struct bpf_core_relo *relo,
1018 int relo_idx, const struct bpf_core_relo_res *res)
1020 __u64 orig_val, new_val;
1023 class = BPF_CLASS(insn->code);
1027 /* poison second part of ldimm64 to avoid confusing error from
1028 * verifier about "unknown opcode 00"
1030 if (is_ldimm64_insn(insn))
1031 bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1);
1032 bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn);
1036 orig_val = res->orig_val;
1037 new_val = res->new_val;
1042 if (BPF_SRC(insn->code) != BPF_K)
1044 if (res->validate && insn->imm != orig_val) {
1045 pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %llu -> %llu\n",
1046 prog_name, relo_idx,
1047 insn_idx, insn->imm, (unsigned long long)orig_val,
1048 (unsigned long long)new_val);
1051 orig_val = insn->imm;
1052 insn->imm = new_val;
1053 pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %llu -> %llu\n",
1054 prog_name, relo_idx, insn_idx,
1055 (unsigned long long)orig_val, (unsigned long long)new_val);
1060 if (res->validate && insn->off != orig_val) {
1061 pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %llu -> %llu\n",
1062 prog_name, relo_idx, insn_idx, insn->off, (unsigned long long)orig_val,
1063 (unsigned long long)new_val);
1066 if (new_val > SHRT_MAX) {
1067 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %llu\n",
1068 prog_name, relo_idx, insn_idx, (unsigned long long)new_val);
1071 if (res->fail_memsz_adjust) {
1072 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
1073 "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
1074 prog_name, relo_idx, insn_idx);
1078 orig_val = insn->off;
1079 insn->off = new_val;
1080 pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %llu -> %llu\n",
1081 prog_name, relo_idx, insn_idx, (unsigned long long)orig_val,
1082 (unsigned long long)new_val);
1084 if (res->new_sz != res->orig_sz) {
1085 int insn_bytes_sz, insn_bpf_sz;
1087 insn_bytes_sz = insn_bpf_size_to_bytes(insn);
1088 if (insn_bytes_sz != res->orig_sz) {
1089 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
1090 prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
1094 insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
1095 if (insn_bpf_sz < 0) {
1096 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
1097 prog_name, relo_idx, insn_idx, res->new_sz);
1101 insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
1102 pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
1103 prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
1109 if (!is_ldimm64_insn(insn) ||
1110 insn[0].src_reg != 0 || insn[0].off != 0 ||
1111 insn[1].code != 0 || insn[1].dst_reg != 0 ||
1112 insn[1].src_reg != 0 || insn[1].off != 0) {
1113 pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
1114 prog_name, relo_idx, insn_idx);
1118 imm = (__u32)insn[0].imm | ((__u64)insn[1].imm << 32);
1119 if (res->validate && imm != orig_val) {
1120 pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %llu -> %llu\n",
1121 prog_name, relo_idx,
1122 insn_idx, (unsigned long long)imm,
1123 (unsigned long long)orig_val, (unsigned long long)new_val);
1127 insn[0].imm = new_val;
1128 insn[1].imm = new_val >> 32;
1129 pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %llu\n",
1130 prog_name, relo_idx, insn_idx,
1131 (unsigned long long)imm, (unsigned long long)new_val);
1135 pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
1136 prog_name, relo_idx, insn_idx, insn->code,
1137 insn->src_reg, insn->dst_reg, insn->off, insn->imm);
1144 /* Output spec definition in the format:
1145 * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
1146 * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
1148 int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec)
1150 const struct btf_type *t;
1155 #define append_buf(fmt, args...) \
1158 r = snprintf(buf, buf_sz, fmt, ##args); \
1166 type_id = spec->root_type_id;
1167 t = btf_type_by_id(spec->btf, type_id);
1168 s = btf__name_by_offset(spec->btf, t->name_off);
1170 append_buf("<%s> [%u] %s %s",
1171 core_relo_kind_str(spec->relo_kind),
1172 type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
1174 if (core_relo_is_type_based(spec->relo_kind))
1177 if (core_relo_is_enumval_based(spec->relo_kind)) {
1178 t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
1179 if (btf_is_enum(t)) {
1180 const struct btf_enum *e;
1181 const char *fmt_str;
1183 e = btf_enum(t) + spec->raw_spec[0];
1184 s = btf__name_by_offset(spec->btf, e->name_off);
1185 fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %d" : "::%s = %u";
1186 append_buf(fmt_str, s, e->val);
1188 const struct btf_enum64 *e;
1189 const char *fmt_str;
1191 e = btf_enum64(t) + spec->raw_spec[0];
1192 s = btf__name_by_offset(spec->btf, e->name_off);
1193 fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %lld" : "::%s = %llu";
1194 append_buf(fmt_str, s, (unsigned long long)btf_enum64_value(e));
1199 if (core_relo_is_field_based(spec->relo_kind)) {
1200 for (i = 0; i < spec->len; i++) {
1201 if (spec->spec[i].name)
1202 append_buf(".%s", spec->spec[i].name);
1203 else if (i > 0 || spec->spec[i].idx > 0)
1204 append_buf("[%u]", spec->spec[i].idx);
1208 for (i = 0; i < spec->raw_len; i++)
1209 append_buf("%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
1211 if (spec->bit_offset % 8)
1212 append_buf(" @ offset %u.%u)", spec->bit_offset / 8, spec->bit_offset % 8);
1214 append_buf(" @ offset %u)", spec->bit_offset / 8);
1223 * Calculate CO-RE relocation target result.
1225 * The outline and important points of the algorithm:
1226 * 1. For given local type, find corresponding candidate target types.
1227 * Candidate type is a type with the same "essential" name, ignoring
1228 * everything after last triple underscore (___). E.g., `sample`,
1229 * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
1230 * for each other. Names with triple underscore are referred to as
1231 * "flavors" and are useful, among other things, to allow to
1232 * specify/support incompatible variations of the same kernel struct, which
1233 * might differ between different kernel versions and/or build
1236 * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
1237 * converter, when deduplicated BTF of a kernel still contains more than
1238 * one different types with the same name. In that case, ___2, ___3, etc
1239 * are appended starting from second name conflict. But start flavors are
1240 * also useful to be defined "locally", in BPF program, to extract same
1241 * data from incompatible changes between different kernel
1242 * versions/configurations. For instance, to handle field renames between
1243 * kernel versions, one can use two flavors of the struct name with the
1244 * same common name and use conditional relocations to extract that field,
1245 * depending on target kernel version.
1246 * 2. For each candidate type, try to match local specification to this
1247 * candidate target type. Matching involves finding corresponding
1248 * high-level spec accessors, meaning that all named fields should match,
1249 * as well as all array accesses should be within the actual bounds. Also,
1250 * types should be compatible (see bpf_core_fields_are_compat for details).
1251 * 3. It is supported and expected that there might be multiple flavors
1252 * matching the spec. As long as all the specs resolve to the same set of
1253 * offsets across all candidates, there is no error. If there is any
1254 * ambiguity, CO-RE relocation will fail. This is necessary to accommodate
1255 * imperfection of BTF deduplication, which can cause slight duplication of
1256 * the same BTF type, if some directly or indirectly referenced (by
1257 * pointer) type gets resolved to different actual types in different
1258 * object files. If such a situation occurs, deduplicated BTF will end up
1259 * with two (or more) structurally identical types, which differ only in
1260 * types they refer to through pointer. This should be OK in most cases and
1262 * 4. Candidate types search is performed by linearly scanning through all
1263 * types in target BTF. It is anticipated that this is overall more
1264 * efficient memory-wise and not significantly worse (if not better)
1265 * CPU-wise compared to prebuilding a map from all local type names to
1266 * a list of candidate type names. It's also sped up by caching resolved
1267 * list of matching candidates per each local "root" type ID, that has at
1268 * least one bpf_core_relo associated with it. This list is shared
1269 * between multiple relocations for the same type ID and is updated as some
1270 * of the candidates are pruned due to structural incompatibility.
1272 int bpf_core_calc_relo_insn(const char *prog_name,
1273 const struct bpf_core_relo *relo,
1275 const struct btf *local_btf,
1276 struct bpf_core_cand_list *cands,
1277 struct bpf_core_spec *specs_scratch,
1278 struct bpf_core_relo_res *targ_res)
1280 struct bpf_core_spec *local_spec = &specs_scratch[0];
1281 struct bpf_core_spec *cand_spec = &specs_scratch[1];
1282 struct bpf_core_spec *targ_spec = &specs_scratch[2];
1283 struct bpf_core_relo_res cand_res;
1284 const struct btf_type *local_type;
1285 const char *local_name;
1290 local_id = relo->type_id;
1291 local_type = btf_type_by_id(local_btf, local_id);
1292 local_name = btf__name_by_offset(local_btf, local_type->name_off);
1296 err = bpf_core_parse_spec(prog_name, local_btf, relo, local_spec);
1298 const char *spec_str;
1300 spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
1301 pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
1302 prog_name, relo_idx, local_id, btf_kind_str(local_type),
1303 str_is_empty(local_name) ? "<anon>" : local_name,
1304 spec_str ?: "<?>", err);
1308 bpf_core_format_spec(spec_buf, sizeof(spec_buf), local_spec);
1309 pr_debug("prog '%s': relo #%d: %s\n", prog_name, relo_idx, spec_buf);
1311 /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
1312 if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) {
1313 /* bpf_insn's imm value could get out of sync during linking */
1314 memset(targ_res, 0, sizeof(*targ_res));
1315 targ_res->validate = false;
1316 targ_res->poison = false;
1317 targ_res->orig_val = local_spec->root_type_id;
1318 targ_res->new_val = local_spec->root_type_id;
1322 /* libbpf doesn't support candidate search for anonymous types */
1323 if (str_is_empty(local_name)) {
1324 pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
1325 prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
1329 for (i = 0, j = 0; i < cands->len; i++) {
1330 err = bpf_core_spec_match(local_spec, cands->cands[i].btf,
1331 cands->cands[i].id, cand_spec);
1333 bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec);
1334 pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ",
1335 prog_name, relo_idx, i, spec_buf, err);
1339 bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec);
1340 pr_debug("prog '%s': relo #%d: %s candidate #%d %s\n", prog_name,
1341 relo_idx, err == 0 ? "non-matching" : "matching", i, spec_buf);
1346 err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, cand_spec, &cand_res);
1351 *targ_res = cand_res;
1352 *targ_spec = *cand_spec;
1353 } else if (cand_spec->bit_offset != targ_spec->bit_offset) {
1354 /* if there are many field relo candidates, they
1355 * should all resolve to the same bit offset
1357 pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
1358 prog_name, relo_idx, cand_spec->bit_offset,
1359 targ_spec->bit_offset);
1361 } else if (cand_res.poison != targ_res->poison ||
1362 cand_res.new_val != targ_res->new_val) {
1363 /* all candidates should result in the same relocation
1364 * decision and value, otherwise it's dangerous to
1365 * proceed due to ambiguity
1367 pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %llu != %s %llu\n",
1368 prog_name, relo_idx,
1369 cand_res.poison ? "failure" : "success",
1370 (unsigned long long)cand_res.new_val,
1371 targ_res->poison ? "failure" : "success",
1372 (unsigned long long)targ_res->new_val);
1376 cands->cands[j++] = cands->cands[i];
1380 * For BPF_CORE_FIELD_EXISTS relo or when used BPF program has field
1381 * existence checks or kernel version/config checks, it's expected
1382 * that we might not find any candidates. In this case, if field
1383 * wasn't found in any candidate, the list of candidates shouldn't
1384 * change at all, we'll just handle relocating appropriately,
1385 * depending on relo's kind.
1391 * If no candidates were found, it might be both a programmer error,
1392 * as well as expected case, depending whether instruction w/
1393 * relocation is guarded in some way that makes it unreachable (dead
1394 * code) if relocation can't be resolved. This is handled in
1395 * bpf_core_patch_insn() uniformly by replacing that instruction with
1396 * BPF helper call insn (using invalid helper ID). If that instruction
1397 * is indeed unreachable, then it will be ignored and eliminated by
1398 * verifier. If it was an error, then verifier will complain and point
1399 * to a specific instruction number in its log.
1402 pr_debug("prog '%s': relo #%d: no matching targets found\n",
1403 prog_name, relo_idx);
1405 /* calculate single target relo result explicitly */
1406 err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, targ_res);