1 // SPDX-License-Identifier: GPL-2.0
5 #include "demangle-rust.h"
8 * Mangled Rust symbols look like this:
10 * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
12 * The original symbol is:
14 * <std::sys::fd::FileDesc as core::ops::Drop>::drop
16 * The last component of the path is a 64-bit hash in lowercase hex, prefixed
17 * with "h". Rust does not have a global namespace between crates, an illusion
18 * which Rust maintains by using the hash to distinguish things that would
19 * otherwise have the same symbol.
21 * Any path component not starting with a XID_Start character is prefixed with
24 * The following escape sequences are used:
40 * A double ".." means "::" and a single "." means "-".
42 * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
45 static const char *hash_prefix = "::h";
46 static const size_t hash_prefix_len = 3;
47 static const size_t hash_len = 16;
49 static bool is_prefixed_hash(const char *start);
50 static bool looks_like_rust(const char *sym, size_t len);
51 static bool unescape(const char **in, char **out, const char *seq, char value);
55 * sym: symbol that has been through BFD-demangling
57 * This function looks for the following indicators:
59 * 1. The hash must consist of "h" followed by 16 lowercase hex digits.
61 * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
62 * hex digits. This is true of 99.9998% of hashes so once in your life you
63 * may see a false negative. The point is to notice path components that
64 * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
65 * this case a false positive (non-Rust symbol has an important path
66 * component removed because it looks like a Rust hash) is worse than a
67 * false negative (the rare Rust symbol is not demangled) so this sets the
68 * balance in favor of false negatives.
70 * 3. There must be no characters other than a-zA-Z0-9 and _.:$
72 * 4. There must be no unrecognized $-sign sequences.
74 * 5. There must be no sequence of three or more dots in a row ("...").
77 rust_is_mangled(const char *sym)
79 size_t len, len_without_hash;
85 if (len <= hash_prefix_len + hash_len)
86 /* Not long enough to contain "::h" + hash + something else */
89 len_without_hash = len - (hash_prefix_len + hash_len);
90 if (!is_prefixed_hash(sym + len_without_hash))
93 return looks_like_rust(sym, len_without_hash);
97 * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
98 * digits must comprise between 5 and 15 (inclusive) distinct digits.
100 static bool is_prefixed_hash(const char *str)
107 if (strncmp(str, hash_prefix, hash_prefix_len))
109 str += hash_prefix_len;
111 memset(seen, false, sizeof(seen));
112 for (end = str + hash_len; str < end; str++)
113 if (*str >= '0' && *str <= '9')
114 seen[*str - '0'] = true;
115 else if (*str >= 'a' && *str <= 'f')
116 seen[*str - 'a' + 10] = true;
120 /* Count how many distinct digits seen */
122 for (i = 0; i < 16; i++)
126 return count >= 5 && count <= 15;
129 static bool looks_like_rust(const char *str, size_t len)
131 const char *end = str + len;
136 if (!strncmp(str, "$C$", 3))
138 else if (!strncmp(str, "$SP$", 4)
139 || !strncmp(str, "$BP$", 4)
140 || !strncmp(str, "$RF$", 4)
141 || !strncmp(str, "$LT$", 4)
142 || !strncmp(str, "$GT$", 4)
143 || !strncmp(str, "$LP$", 4)
144 || !strncmp(str, "$RP$", 4))
146 else if (!strncmp(str, "$u20$", 5)
147 || !strncmp(str, "$u27$", 5)
148 || !strncmp(str, "$u5b$", 5)
149 || !strncmp(str, "$u5d$", 5)
150 || !strncmp(str, "$u7e$", 5))
156 /* Do not allow three or more consecutive dots */
157 if (!strncmp(str, "...", 3))
176 * sym: symbol for which rust_is_mangled(sym) returns true
178 * The input is demangled in-place because the mangled name is always longer
179 * than the demangled one.
182 rust_demangle_sym(char *sym)
193 end = sym + strlen(sym) - (hash_prefix_len + hash_len);
198 if (!(unescape(&in, &out, "$C$", ',')
199 || unescape(&in, &out, "$SP$", '@')
200 || unescape(&in, &out, "$BP$", '*')
201 || unescape(&in, &out, "$RF$", '&')
202 || unescape(&in, &out, "$LT$", '<')
203 || unescape(&in, &out, "$GT$", '>')
204 || unescape(&in, &out, "$LP$", '(')
205 || unescape(&in, &out, "$RP$", ')')
206 || unescape(&in, &out, "$u20$", ' ')
207 || unescape(&in, &out, "$u27$", '\'')
208 || unescape(&in, &out, "$u5b$", '[')
209 || unescape(&in, &out, "$u5d$", ']')
210 || unescape(&in, &out, "$u7e$", '~'))) {
211 pr_err("demangle-rust: unexpected escape sequence");
217 * If this is the start of a path component and the next
218 * character is an escape sequence, ignore the
219 * underscore. The mangler inserts an underscore to make
220 * sure the path component begins with a XID_Start
223 if ((in == sym || in[-1] == ':') && in[1] == '$')
230 /* ".." becomes "::" */
235 /* "." becomes "-" */
247 pr_err("demangle-rust: unexpected character '%c' in symbol\n",
256 static bool unescape(const char **in, char **out, const char *seq, char value)
258 size_t len = strlen(seq);
260 if (strncmp(*in, seq, len))