1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * Lexical analysis for genksyms.
4 * Copyright 1996, 1997 Linux International.
6 * New implementation contributed by Richard Henderson <rth@tamu.edu>
7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
9 * Taken from Linux modutils 2.4.22.
20 #include "parse.tab.h"
22 /* We've got a two-level lexer here. We let flex do basic tokenization
23 and then we categorize those basic tokens in the second stage. */
24 #define YY_DECL static int yylex1(void)
28 IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
32 X_INT 0[Xx][0-9A-Fa-f]+
33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
34 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
36 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.)
39 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
41 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\"
42 CHAR L?\'([^\\\']*\\.)*[^\\\']*\'
44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
46 /* We don't do multiple input files. */
54 /* Keep track of our location in the original source files. */
55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
59 /* Ignore all other whitespace. */
63 {STRING} return STRING;
67 /* The Pedant requires that the other C multi-character tokens be
68 recognized as tokens. We don't actually use them since we don't
69 parse expressions, but we do want whitespace to be arranged
70 around them properly. */
71 {MC_TOKEN} return OTHER;
77 /* All other tokens are single characters. */
83 /* Bring in the keyword recognizer. */
88 /* Macros to append to our phrase collection list. */
91 * We mark any token, that that equals to a known enumerator, as
92 * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
93 * the only problem is struct and union members:
94 * enum e { a, b }; struct s { int a, b; }
95 * but in this case, the only effect will be, that the ABI checksums become
96 * more volatile, which is acceptable. Also, such collisions are quite rare,
97 * so far it was only observed in include/linux/telephony.h.
99 #define _APP(T,L) do { \
100 cur_node = next_node; \
101 next_node = xmalloc(sizeof(*next_node)); \
102 next_node->next = cur_node; \
103 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
105 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
106 SYM_ENUM_CONST : SYM_NORMAL ; \
107 cur_node->in_source_file = in_source_file; \
110 #define APP _APP(yytext, yyleng)
113 /* The second stage lexer. Here we incorporate knowledge of the state
114 of the parser to tailor the tokens that are returned. */
120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
122 ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
123 ST_TABLE_5, ST_TABLE_6
124 } lexstate = ST_NOTSTARTED;
126 static int suppress_type_lookup, dont_want_brace_phrase;
127 static struct string_list *next_node;
129 int token, count = 0;
130 struct string_list *cur_node;
132 if (lexstate == ST_NOTSTARTED)
134 next_node = xmalloc(sizeof(*next_node));
135 next_node->next = NULL;
136 lexstate = ST_NORMAL;
144 else if (token == FILENAME)
148 /* Save the filename and line number for later error messages. */
153 file = strchr(yytext, '\"')+1;
154 e = strchr(file, '\"');
156 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
157 cur_line = atoi(yytext+2);
160 source_file = xstrdup(cur_filename);
163 in_source_file = (strcmp(cur_filename, source_file) == 0);
177 int r = is_reserved_word(yytext, yyleng);
183 lexstate = ST_ATTRIBUTE;
191 lexstate = ST_TYPEOF;
198 dont_want_brace_phrase = 3;
199 suppress_type_lookup = 2;
202 case EXPORT_SYMBOL_KEYW:
205 case STATIC_ASSERT_KEYW:
206 lexstate = ST_STATIC_ASSERT;
211 if (!suppress_type_lookup)
213 if (find_symbol(yytext, SYM_TYPEDEF, 1))
221 lexstate = ST_BRACKET;
227 if (dont_want_brace_phrase)
235 lexstate = ST_EXPRESSION;
255 lexstate = ST_NORMAL;
256 token = ATTRIBUTE_PHRASE;
275 lexstate = ST_NORMAL;
288 if (is_reserved_word(yytext, yyleng) >= 0
289 || find_symbol(yytext, SYM_TYPEDEF, 1))
293 lexstate = ST_NORMAL;
299 lexstate = ST_TYPEOF;
307 lexstate = ST_TYPEOF_1;
315 lexstate = ST_NORMAL;
316 token = TYPEOF_PHRASE;
336 lexstate = ST_NORMAL;
337 token = BRACKET_PHRASE;
356 lexstate = ST_NORMAL;
357 token = BRACE_PHRASE;
369 case '(': case '[': case '{':
374 /* is this the last line of an enum declaration? */
377 /* Put back the token we just read so's we can find it again
378 after registering the expression. */
381 lexstate = ST_NORMAL;
382 token = EXPRESSION_PHRASE;
393 /* Put back the token we just read so's we can find it again
394 after registering the expression. */
397 lexstate = ST_NORMAL;
398 token = EXPRESSION_PHRASE;
409 case ST_STATIC_ASSERT:
419 lexstate = ST_NORMAL;
420 token = STATIC_ASSERT_PHRASE;
433 if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
435 token = EXPORT_SYMBOL_KEYW;
436 lexstate = ST_TABLE_5;
440 lexstate = ST_TABLE_6;
446 case '{': case '[': case '(':
449 case '}': case ']': case ')':
454 lexstate = ST_TABLE_2;
464 lexstate = ST_NORMAL;
472 lexstate = ST_TABLE_2;
486 if (suppress_type_lookup > 0)
487 --suppress_type_lookup;
488 if (dont_want_brace_phrase > 0)
489 --dont_want_brace_phrase;
491 yylval = &next_node->next;