123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863 |
- #------------------------------------------------------------------------------
- # pycparser: c_parser.py
- #
- # CParser class: Parser and AST builder for the C language
- #
- # Eli Bendersky [https://eli.thegreenplace.net/]
- # License: BSD
- #------------------------------------------------------------------------------
- import re
- from .ply import yacc
- from . import c_ast
- from .c_lexer import CLexer
- from .plyparser import PLYParser, Coord, ParseError, parameterized, template
- from .ast_transforms import fix_switch_cases
- @template
- class CParser(PLYParser):
- def __init__(
- self,
- lex_optimize=True,
- lexer=CLexer,
- lextab='pycparser.lextab',
- yacc_optimize=True,
- yacctab='pycparser.yacctab',
- yacc_debug=False,
- taboutputdir=''):
- """ Create a new CParser.
- Some arguments for controlling the debug/optimization
- level of the parser are provided. The defaults are
- tuned for release/performance mode.
- The simple rules for using them are:
- *) When tweaking CParser/CLexer, set these to False
- *) When releasing a stable parser, set to True
- lex_optimize:
- Set to False when you're modifying the lexer.
- Otherwise, changes in the lexer won't be used, if
- some lextab.py file exists.
- When releasing with a stable lexer, set to True
- to save the re-generation of the lexer table on
- each run.
- lexer:
- Set this parameter to define the lexer to use if
- you're not using the default CLexer.
- lextab:
- Points to the lex table that's used for optimized
- mode. Only if you're modifying the lexer and want
- some tests to avoid re-generating the table, make
- this point to a local lex table file (that's been
- earlier generated with lex_optimize=True)
- yacc_optimize:
- Set to False when you're modifying the parser.
- Otherwise, changes in the parser won't be used, if
- some parsetab.py file exists.
- When releasing with a stable parser, set to True
- to save the re-generation of the parser table on
- each run.
- yacctab:
- Points to the yacc table that's used for optimized
- mode. Only if you're modifying the parser, make
- this point to a local yacc table file
- yacc_debug:
- Generate a parser.out file that explains how yacc
- built the parsing table from the grammar.
- taboutputdir:
- Set this parameter to control the location of generated
- lextab and yacctab files.
- """
- self.clex = lexer(
- error_func=self._lex_error_func,
- on_lbrace_func=self._lex_on_lbrace_func,
- on_rbrace_func=self._lex_on_rbrace_func,
- type_lookup_func=self._lex_type_lookup_func)
- self.clex.build(
- optimize=lex_optimize,
- lextab=lextab,
- outputdir=taboutputdir)
- self.tokens = self.clex.tokens
- rules_with_opt = [
- 'abstract_declarator',
- 'assignment_expression',
- 'declaration_list',
- 'declaration_specifiers_no_type',
- 'designation',
- 'expression',
- 'identifier_list',
- 'init_declarator_list',
- 'id_init_declarator_list',
- 'initializer_list',
- 'parameter_type_list',
- 'block_item_list',
- 'type_qualifier_list',
- 'struct_declarator_list'
- ]
- for rule in rules_with_opt:
- self._create_opt_rule(rule)
- self.cparser = yacc.yacc(
- module=self,
- start='translation_unit_or_empty',
- debug=yacc_debug,
- optimize=yacc_optimize,
- tabmodule=yacctab,
- outputdir=taboutputdir)
- # Stack of scopes for keeping track of symbols. _scope_stack[-1] is
- # the current (topmost) scope. Each scope is a dictionary that
- # specifies whether a name is a type. If _scope_stack[n][name] is
- # True, 'name' is currently a type in the scope. If it's False,
- # 'name' is used in the scope but not as a type (for instance, if we
- # saw: int name;
- # If 'name' is not a key in _scope_stack[n] then 'name' was not defined
- # in this scope at all.
- self._scope_stack = [dict()]
- # Keeps track of the last token given to yacc (the lookahead token)
- self._last_yielded_token = None
- def parse(self, text, filename='', debuglevel=0):
- """ Parses C code and returns an AST.
- text:
- A string containing the C source code
- filename:
- Name of the file being parsed (for meaningful
- error messages)
- debuglevel:
- Debug level to yacc
- """
- self.clex.filename = filename
- self.clex.reset_lineno()
- self._scope_stack = [dict()]
- self._last_yielded_token = None
- return self.cparser.parse(
- input=text,
- lexer=self.clex,
- debug=debuglevel)
- ######################-- PRIVATE --######################
- def _push_scope(self):
- self._scope_stack.append(dict())
- def _pop_scope(self):
- assert len(self._scope_stack) > 1
- self._scope_stack.pop()
- def _add_typedef_name(self, name, coord):
- """ Add a new typedef name (ie a TYPEID) to the current scope
- """
- if not self._scope_stack[-1].get(name, True):
- self._parse_error(
- "Typedef %r previously declared as non-typedef "
- "in this scope" % name, coord)
- self._scope_stack[-1][name] = True
- def _add_identifier(self, name, coord):
- """ Add a new object, function, or enum member name (ie an ID) to the
- current scope
- """
- if self._scope_stack[-1].get(name, False):
- self._parse_error(
- "Non-typedef %r previously declared as typedef "
- "in this scope" % name, coord)
- self._scope_stack[-1][name] = False
- def _is_type_in_scope(self, name):
- """ Is *name* a typedef-name in the current scope?
- """
- for scope in reversed(self._scope_stack):
- # If name is an identifier in this scope it shadows typedefs in
- # higher scopes.
- in_scope = scope.get(name)
- if in_scope is not None: return in_scope
- return False
- def _lex_error_func(self, msg, line, column):
- self._parse_error(msg, self._coord(line, column))
- def _lex_on_lbrace_func(self):
- self._push_scope()
- def _lex_on_rbrace_func(self):
- self._pop_scope()
- def _lex_type_lookup_func(self, name):
- """ Looks up types that were previously defined with
- typedef.
- Passed to the lexer for recognizing identifiers that
- are types.
- """
- is_type = self._is_type_in_scope(name)
- return is_type
- def _get_yacc_lookahead_token(self):
- """ We need access to yacc's lookahead token in certain cases.
- This is the last token yacc requested from the lexer, so we
- ask the lexer.
- """
- return self.clex.last_token
- # To understand what's going on here, read sections A.8.5 and
- # A.8.6 of K&R2 very carefully.
- #
- # A C type consists of a basic type declaration, with a list
- # of modifiers. For example:
- #
- # int *c[5];
- #
- # The basic declaration here is 'int c', and the pointer and
- # the array are the modifiers.
- #
- # Basic declarations are represented by TypeDecl (from module c_ast) and the
- # modifiers are FuncDecl, PtrDecl and ArrayDecl.
- #
- # The standard states that whenever a new modifier is parsed, it should be
- # added to the end of the list of modifiers. For example:
- #
- # K&R2 A.8.6.2: Array Declarators
- #
- # In a declaration T D where D has the form
- # D1 [constant-expression-opt]
- # and the type of the identifier in the declaration T D1 is
- # "type-modifier T", the type of the
- # identifier of D is "type-modifier array of T"
- #
- # This is what this method does. The declarator it receives
- # can be a list of declarators ending with TypeDecl. It
- # tacks the modifier to the end of this list, just before
- # the TypeDecl.
- #
- # Additionally, the modifier may be a list itself. This is
- # useful for pointers, that can come as a chain from the rule
- # p_pointer. In this case, the whole modifier list is spliced
- # into the new location.
- def _type_modify_decl(self, decl, modifier):
- """ Tacks a type modifier on a declarator, and returns
- the modified declarator.
- Note: the declarator and modifier may be modified
- """
- #~ print '****'
- #~ decl.show(offset=3)
- #~ modifier.show(offset=3)
- #~ print '****'
- modifier_head = modifier
- modifier_tail = modifier
- # The modifier may be a nested list. Reach its tail.
- #
- while modifier_tail.type:
- modifier_tail = modifier_tail.type
- # If the decl is a basic type, just tack the modifier onto
- # it
- #
- if isinstance(decl, c_ast.TypeDecl):
- modifier_tail.type = decl
- return modifier
- else:
- # Otherwise, the decl is a list of modifiers. Reach
- # its tail and splice the modifier onto the tail,
- # pointing to the underlying basic type.
- #
- decl_tail = decl
- while not isinstance(decl_tail.type, c_ast.TypeDecl):
- decl_tail = decl_tail.type
- modifier_tail.type = decl_tail.type
- decl_tail.type = modifier_head
- return decl
- # Due to the order in which declarators are constructed,
- # they have to be fixed in order to look like a normal AST.
- #
- # When a declaration arrives from syntax construction, it has
- # these problems:
- # * The innermost TypeDecl has no type (because the basic
- # type is only known at the uppermost declaration level)
- # * The declaration has no variable name, since that is saved
- # in the innermost TypeDecl
- # * The typename of the declaration is a list of type
- # specifiers, and not a node. Here, basic identifier types
- # should be separated from more complex types like enums
- # and structs.
- #
- # This method fixes these problems.
- #
- def _fix_decl_name_type(self, decl, typename):
- """ Fixes a declaration. Modifies decl.
- """
- # Reach the underlying basic type
- #
- type = decl
- while not isinstance(type, c_ast.TypeDecl):
- type = type.type
- decl.name = type.declname
- type.quals = decl.quals
- # The typename is a list of types. If any type in this
- # list isn't an IdentifierType, it must be the only
- # type in the list (it's illegal to declare "int enum ..")
- # If all the types are basic, they're collected in the
- # IdentifierType holder.
- #
- for tn in typename:
- if not isinstance(tn, c_ast.IdentifierType):
- if len(typename) > 1:
- self._parse_error(
- "Invalid multiple types specified", tn.coord)
- else:
- type.type = tn
- return decl
- if not typename:
- # Functions default to returning int
- #
- if not isinstance(decl.type, c_ast.FuncDecl):
- self._parse_error(
- "Missing type in declaration", decl.coord)
- type.type = c_ast.IdentifierType(
- ['int'],
- coord=decl.coord)
- else:
- # At this point, we know that typename is a list of IdentifierType
- # nodes. Concatenate all the names into a single list.
- #
- type.type = c_ast.IdentifierType(
- [name for id in typename for name in id.names],
- coord=typename[0].coord)
- return decl
- def _add_declaration_specifier(self, declspec, newspec, kind, append=False):
- """ Declaration specifiers are represented by a dictionary
- with the entries:
- * qual: a list of type qualifiers
- * storage: a list of storage type qualifiers
- * type: a list of type specifiers
- * function: a list of function specifiers
- This method is given a declaration specifier, and a
- new specifier of a given kind.
- If `append` is True, the new specifier is added to the end of
- the specifiers list, otherwise it's added at the beginning.
- Returns the declaration specifier, with the new
- specifier incorporated.
- """
- spec = declspec or dict(qual=[], storage=[], type=[], function=[])
- if append:
- spec[kind].append(newspec)
- else:
- spec[kind].insert(0, newspec)
- return spec
- def _build_declarations(self, spec, decls, typedef_namespace=False):
- """ Builds a list of declarations all sharing the given specifiers.
- If typedef_namespace is true, each declared name is added
- to the "typedef namespace", which also includes objects,
- functions, and enum constants.
- """
- is_typedef = 'typedef' in spec['storage']
- declarations = []
- # Bit-fields are allowed to be unnamed.
- #
- if decls[0].get('bitsize') is not None:
- pass
- # When redeclaring typedef names as identifiers in inner scopes, a
- # problem can occur where the identifier gets grouped into
- # spec['type'], leaving decl as None. This can only occur for the
- # first declarator.
- #
- elif decls[0]['decl'] is None:
- if len(spec['type']) < 2 or len(spec['type'][-1].names) != 1 or \
- not self._is_type_in_scope(spec['type'][-1].names[0]):
- coord = '?'
- for t in spec['type']:
- if hasattr(t, 'coord'):
- coord = t.coord
- break
- self._parse_error('Invalid declaration', coord)
- # Make this look as if it came from "direct_declarator:ID"
- decls[0]['decl'] = c_ast.TypeDecl(
- declname=spec['type'][-1].names[0],
- type=None,
- quals=None,
- coord=spec['type'][-1].coord)
- # Remove the "new" type's name from the end of spec['type']
- del spec['type'][-1]
- # A similar problem can occur where the declaration ends up looking
- # like an abstract declarator. Give it a name if this is the case.
- #
- elif not isinstance(decls[0]['decl'],
- (c_ast.Struct, c_ast.Union, c_ast.IdentifierType)):
- decls_0_tail = decls[0]['decl']
- while not isinstance(decls_0_tail, c_ast.TypeDecl):
- decls_0_tail = decls_0_tail.type
- if decls_0_tail.declname is None:
- decls_0_tail.declname = spec['type'][-1].names[0]
- del spec['type'][-1]
- for decl in decls:
- assert decl['decl'] is not None
- if is_typedef:
- declaration = c_ast.Typedef(
- name=None,
- quals=spec['qual'],
- storage=spec['storage'],
- type=decl['decl'],
- coord=decl['decl'].coord)
- else:
- declaration = c_ast.Decl(
- name=None,
- quals=spec['qual'],
- storage=spec['storage'],
- funcspec=spec['function'],
- type=decl['decl'],
- init=decl.get('init'),
- bitsize=decl.get('bitsize'),
- coord=decl['decl'].coord)
- if isinstance(declaration.type,
- (c_ast.Struct, c_ast.Union, c_ast.IdentifierType)):
- fixed_decl = declaration
- else:
- fixed_decl = self._fix_decl_name_type(declaration, spec['type'])
- # Add the type name defined by typedef to a
- # symbol table (for usage in the lexer)
- #
- if typedef_namespace:
- if is_typedef:
- self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
- else:
- self._add_identifier(fixed_decl.name, fixed_decl.coord)
- declarations.append(fixed_decl)
- return declarations
- def _build_function_definition(self, spec, decl, param_decls, body):
- """ Builds a function definition.
- """
- assert 'typedef' not in spec['storage']
- declaration = self._build_declarations(
- spec=spec,
- decls=[dict(decl=decl, init=None)],
- typedef_namespace=True)[0]
- return c_ast.FuncDef(
- decl=declaration,
- param_decls=param_decls,
- body=body,
- coord=decl.coord)
- def _select_struct_union_class(self, token):
- """ Given a token (either STRUCT or UNION), selects the
- appropriate AST class.
- """
- if token == 'struct':
- return c_ast.Struct
- else:
- return c_ast.Union
- ##
- ## Precedence and associativity of operators
- ##
- precedence = (
- ('left', 'LOR'),
- ('left', 'LAND'),
- ('left', 'OR'),
- ('left', 'XOR'),
- ('left', 'AND'),
- ('left', 'EQ', 'NE'),
- ('left', 'GT', 'GE', 'LT', 'LE'),
- ('left', 'RSHIFT', 'LSHIFT'),
- ('left', 'PLUS', 'MINUS'),
- ('left', 'TIMES', 'DIVIDE', 'MOD')
- )
- ##
- ## Grammar productions
- ## Implementation of the BNF defined in K&R2 A.13
- ##
- # Wrapper around a translation unit, to allow for empty input.
- # Not strictly part of the C99 Grammar, but useful in practice.
- #
- def p_translation_unit_or_empty(self, p):
- """ translation_unit_or_empty : translation_unit
- | empty
- """
- if p[1] is None:
- p[0] = c_ast.FileAST([])
- else:
- p[0] = c_ast.FileAST(p[1])
- def p_translation_unit_1(self, p):
- """ translation_unit : external_declaration
- """
- # Note: external_declaration is already a list
- #
- p[0] = p[1]
- def p_translation_unit_2(self, p):
- """ translation_unit : translation_unit external_declaration
- """
- p[1].extend(p[2])
- p[0] = p[1]
- # Declarations always come as lists (because they can be
- # several in one line), so we wrap the function definition
- # into a list as well, to make the return value of
- # external_declaration homogenous.
- #
- def p_external_declaration_1(self, p):
- """ external_declaration : function_definition
- """
- p[0] = [p[1]]
- def p_external_declaration_2(self, p):
- """ external_declaration : declaration
- """
- p[0] = p[1]
- def p_external_declaration_3(self, p):
- """ external_declaration : pp_directive
- | pppragma_directive
- """
- p[0] = [p[1]]
- def p_external_declaration_4(self, p):
- """ external_declaration : SEMI
- """
- p[0] = []
- def p_pp_directive(self, p):
- """ pp_directive : PPHASH
- """
- self._parse_error('Directives not supported yet',
- self._token_coord(p, 1))
- def p_pppragma_directive(self, p):
- """ pppragma_directive : PPPRAGMA
- | PPPRAGMA PPPRAGMASTR
- """
- if len(p) == 3:
- p[0] = c_ast.Pragma(p[2], self._token_coord(p, 2))
- else:
- p[0] = c_ast.Pragma("", self._token_coord(p, 1))
- # In function definitions, the declarator can be followed by
- # a declaration list, for old "K&R style" function definitios.
- #
- def p_function_definition_1(self, p):
- """ function_definition : id_declarator declaration_list_opt compound_statement
- """
- # no declaration specifiers - 'int' becomes the default type
- spec = dict(
- qual=[],
- storage=[],
- type=[c_ast.IdentifierType(['int'],
- coord=self._token_coord(p, 1))],
- function=[])
- p[0] = self._build_function_definition(
- spec=spec,
- decl=p[1],
- param_decls=p[2],
- body=p[3])
- def p_function_definition_2(self, p):
- """ function_definition : declaration_specifiers id_declarator declaration_list_opt compound_statement
- """
- spec = p[1]
- p[0] = self._build_function_definition(
- spec=spec,
- decl=p[2],
- param_decls=p[3],
- body=p[4])
- def p_statement(self, p):
- """ statement : labeled_statement
- | expression_statement
- | compound_statement
- | selection_statement
- | iteration_statement
- | jump_statement
- | pppragma_directive
- """
- p[0] = p[1]
- # A pragma is generally considered a decorator rather than an actual statement.
- # Still, for the purposes of analyzing an abstract syntax tree of C code,
- # pragma's should not be ignored and were previously treated as a statement.
- # This presents a problem for constructs that take a statement such as labeled_statements,
- # selection_statements, and iteration_statements, causing a misleading structure
- # in the AST. For example, consider the following C code.
- #
- # for (int i = 0; i < 3; i++)
- # #pragma omp critical
- # sum += 1;
- #
- # This code will compile and execute "sum += 1;" as the body of the for loop.
- # Previous implementations of PyCParser would render the AST for this
- # block of code as follows:
- #
- # For:
- # DeclList:
- # Decl: i, [], [], []
- # TypeDecl: i, []
- # IdentifierType: ['int']
- # Constant: int, 0
- # BinaryOp: <
- # ID: i
- # Constant: int, 3
- # UnaryOp: p++
- # ID: i
- # Pragma: omp critical
- # Assignment: +=
- # ID: sum
- # Constant: int, 1
- #
- # This AST misleadingly takes the Pragma as the body of the loop and the
- # assignment then becomes a sibling of the loop.
- #
- # To solve edge cases like these, the pragmacomp_or_statement rule groups
- # a pragma and its following statement (which would otherwise be orphaned)
- # using a compound block, effectively turning the above code into:
- #
- # for (int i = 0; i < 3; i++) {
- # #pragma omp critical
- # sum += 1;
- # }
- def p_pragmacomp_or_statement(self, p):
- """ pragmacomp_or_statement : pppragma_directive statement
- | statement
- """
- if isinstance(p[1], c_ast.Pragma) and len(p) == 3:
- p[0] = c_ast.Compound(
- block_items=[p[1], p[2]],
- coord=self._token_coord(p, 1))
- else:
- p[0] = p[1]
- # In C, declarations can come several in a line:
- # int x, *px, romulo = 5;
- #
- # However, for the AST, we will split them to separate Decl
- # nodes.
- #
- # This rule splits its declarations and always returns a list
- # of Decl nodes, even if it's one element long.
- #
- def p_decl_body(self, p):
- """ decl_body : declaration_specifiers init_declarator_list_opt
- | declaration_specifiers_no_type id_init_declarator_list_opt
- """
- spec = p[1]
- # p[2] (init_declarator_list_opt) is either a list or None
- #
- if p[2] is None:
- # By the standard, you must have at least one declarator unless
- # declaring a structure tag, a union tag, or the members of an
- # enumeration.
- #
- ty = spec['type']
- s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
- if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
- decls = [c_ast.Decl(
- name=None,
- quals=spec['qual'],
- storage=spec['storage'],
- funcspec=spec['function'],
- type=ty[0],
- init=None,
- bitsize=None,
- coord=ty[0].coord)]
- # However, this case can also occur on redeclared identifiers in
- # an inner scope. The trouble is that the redeclared type's name
- # gets grouped into declaration_specifiers; _build_declarations
- # compensates for this.
- #
- else:
- decls = self._build_declarations(
- spec=spec,
- decls=[dict(decl=None, init=None)],
- typedef_namespace=True)
- else:
- decls = self._build_declarations(
- spec=spec,
- decls=p[2],
- typedef_namespace=True)
- p[0] = decls
- # The declaration has been split to a decl_body sub-rule and
- # SEMI, because having them in a single rule created a problem
- # for defining typedefs.
- #
- # If a typedef line was directly followed by a line using the
- # type defined with the typedef, the type would not be
- # recognized. This is because to reduce the declaration rule,
- # the parser's lookahead asked for the token after SEMI, which
- # was the type from the next line, and the lexer had no chance
- # to see the updated type symbol table.
- #
- # Splitting solves this problem, because after seeing SEMI,
- # the parser reduces decl_body, which actually adds the new
- # type into the table to be seen by the lexer before the next
- # line is reached.
- def p_declaration(self, p):
- """ declaration : decl_body SEMI
- """
- p[0] = p[1]
- # Since each declaration is a list of declarations, this
- # rule will combine all the declarations and return a single
- # list
- #
- def p_declaration_list(self, p):
- """ declaration_list : declaration
- | declaration_list declaration
- """
- p[0] = p[1] if len(p) == 2 else p[1] + p[2]
- # To know when declaration-specifiers end and declarators begin,
- # we require declaration-specifiers to have at least one
- # type-specifier, and disallow typedef-names after we've seen any
- # type-specifier. These are both required by the spec.
- #
- def p_declaration_specifiers_no_type_1(self, p):
- """ declaration_specifiers_no_type : type_qualifier declaration_specifiers_no_type_opt
- """
- p[0] = self._add_declaration_specifier(p[2], p[1], 'qual')
- def p_declaration_specifiers_no_type_2(self, p):
- """ declaration_specifiers_no_type : storage_class_specifier declaration_specifiers_no_type_opt
- """
- p[0] = self._add_declaration_specifier(p[2], p[1], 'storage')
- def p_declaration_specifiers_no_type_3(self, p):
- """ declaration_specifiers_no_type : function_specifier declaration_specifiers_no_type_opt
- """
- p[0] = self._add_declaration_specifier(p[2], p[1], 'function')
- def p_declaration_specifiers_1(self, p):
- """ declaration_specifiers : declaration_specifiers type_qualifier
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
- def p_declaration_specifiers_2(self, p):
- """ declaration_specifiers : declaration_specifiers storage_class_specifier
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'storage', append=True)
- def p_declaration_specifiers_3(self, p):
- """ declaration_specifiers : declaration_specifiers function_specifier
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'function', append=True)
- def p_declaration_specifiers_4(self, p):
- """ declaration_specifiers : declaration_specifiers type_specifier_no_typeid
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
- def p_declaration_specifiers_5(self, p):
- """ declaration_specifiers : type_specifier
- """
- p[0] = self._add_declaration_specifier(None, p[1], 'type')
- def p_declaration_specifiers_6(self, p):
- """ declaration_specifiers : declaration_specifiers_no_type type_specifier
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
- def p_storage_class_specifier(self, p):
- """ storage_class_specifier : AUTO
- | REGISTER
- | STATIC
- | EXTERN
- | TYPEDEF
- """
- p[0] = p[1]
- def p_function_specifier(self, p):
- """ function_specifier : INLINE
- """
- p[0] = p[1]
- def p_type_specifier_no_typeid(self, p):
- """ type_specifier_no_typeid : VOID
- | _BOOL
- | CHAR
- | SHORT
- | INT
- | LONG
- | FLOAT
- | DOUBLE
- | _COMPLEX
- | SIGNED
- | UNSIGNED
- | __INT128
- """
- p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
- def p_type_specifier(self, p):
- """ type_specifier : typedef_name
- | enum_specifier
- | struct_or_union_specifier
- | type_specifier_no_typeid
- """
- p[0] = p[1]
- def p_type_qualifier(self, p):
- """ type_qualifier : CONST
- | RESTRICT
- | VOLATILE
- """
- p[0] = p[1]
- def p_init_declarator_list(self, p):
- """ init_declarator_list : init_declarator
- | init_declarator_list COMMA init_declarator
- """
- p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
- # Returns a {decl=<declarator> : init=<initializer>} dictionary
- # If there's no initializer, uses None
- #
- def p_init_declarator(self, p):
- """ init_declarator : declarator
- | declarator EQUALS initializer
- """
- p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
- def p_id_init_declarator_list(self, p):
- """ id_init_declarator_list : id_init_declarator
- | id_init_declarator_list COMMA init_declarator
- """
- p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
- def p_id_init_declarator(self, p):
- """ id_init_declarator : id_declarator
- | id_declarator EQUALS initializer
- """
- p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
- # Require at least one type specifier in a specifier-qualifier-list
- #
- def p_specifier_qualifier_list_1(self, p):
- """ specifier_qualifier_list : specifier_qualifier_list type_specifier_no_typeid
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
- def p_specifier_qualifier_list_2(self, p):
- """ specifier_qualifier_list : specifier_qualifier_list type_qualifier
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
- def p_specifier_qualifier_list_3(self, p):
- """ specifier_qualifier_list : type_specifier
- """
- p[0] = self._add_declaration_specifier(None, p[1], 'type')
- def p_specifier_qualifier_list_4(self, p):
- """ specifier_qualifier_list : type_qualifier_list type_specifier
- """
- spec = dict(qual=p[1], storage=[], type=[], function=[])
- p[0] = self._add_declaration_specifier(spec, p[2], 'type', append=True)
- # TYPEID is allowed here (and in other struct/enum related tag names), because
- # struct/enum tags reside in their own namespace and can be named the same as types
- #
- def p_struct_or_union_specifier_1(self, p):
- """ struct_or_union_specifier : struct_or_union ID
- | struct_or_union TYPEID
- """
- klass = self._select_struct_union_class(p[1])
- # None means no list of members
- p[0] = klass(
- name=p[2],
- decls=None,
- coord=self._token_coord(p, 2))
- def p_struct_or_union_specifier_2(self, p):
- """ struct_or_union_specifier : struct_or_union brace_open struct_declaration_list brace_close
- | struct_or_union brace_open brace_close
- """
- klass = self._select_struct_union_class(p[1])
- if len(p) == 4:
- # Empty sequence means an empty list of members
- p[0] = klass(
- name=None,
- decls=[],
- coord=self._token_coord(p, 2))
- else:
- p[0] = klass(
- name=None,
- decls=p[3],
- coord=self._token_coord(p, 2))
- def p_struct_or_union_specifier_3(self, p):
- """ struct_or_union_specifier : struct_or_union ID brace_open struct_declaration_list brace_close
- | struct_or_union ID brace_open brace_close
- | struct_or_union TYPEID brace_open struct_declaration_list brace_close
- | struct_or_union TYPEID brace_open brace_close
- """
- klass = self._select_struct_union_class(p[1])
- if len(p) == 5:
- # Empty sequence means an empty list of members
- p[0] = klass(
- name=p[2],
- decls=[],
- coord=self._token_coord(p, 2))
- else:
- p[0] = klass(
- name=p[2],
- decls=p[4],
- coord=self._token_coord(p, 2))
- def p_struct_or_union(self, p):
- """ struct_or_union : STRUCT
- | UNION
- """
- p[0] = p[1]
- # Combine all declarations into a single list
- #
- def p_struct_declaration_list(self, p):
- """ struct_declaration_list : struct_declaration
- | struct_declaration_list struct_declaration
- """
- if len(p) == 2:
- p[0] = p[1] or []
- else:
- p[0] = p[1] + (p[2] or [])
- def p_struct_declaration_1(self, p):
- """ struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI
- """
- spec = p[1]
- assert 'typedef' not in spec['storage']
- if p[2] is not None:
- decls = self._build_declarations(
- spec=spec,
- decls=p[2])
- elif len(spec['type']) == 1:
- # Anonymous struct/union, gcc extension, C1x feature.
- # Although the standard only allows structs/unions here, I see no
- # reason to disallow other types since some compilers have typedefs
- # here, and pycparser isn't about rejecting all invalid code.
- #
- node = spec['type'][0]
- if isinstance(node, c_ast.Node):
- decl_type = node
- else:
- decl_type = c_ast.IdentifierType(node)
- decls = self._build_declarations(
- spec=spec,
- decls=[dict(decl=decl_type)])
- else:
- # Structure/union members can have the same names as typedefs.
- # The trouble is that the member's name gets grouped into
- # specifier_qualifier_list; _build_declarations compensates.
- #
- decls = self._build_declarations(
- spec=spec,
- decls=[dict(decl=None, init=None)])
- p[0] = decls
- def p_struct_declaration_2(self, p):
- """ struct_declaration : SEMI
- """
- p[0] = None
- def p_struct_declaration_3(self, p):
- """ struct_declaration : pppragma_directive
- """
- p[0] = [p[1]]
- def p_struct_declarator_list(self, p):
- """ struct_declarator_list : struct_declarator
- | struct_declarator_list COMMA struct_declarator
- """
- p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
- # struct_declarator passes up a dict with the keys: decl (for
- # the underlying declarator) and bitsize (for the bitsize)
- #
- def p_struct_declarator_1(self, p):
- """ struct_declarator : declarator
- """
- p[0] = {'decl': p[1], 'bitsize': None}
- def p_struct_declarator_2(self, p):
- """ struct_declarator : declarator COLON constant_expression
- | COLON constant_expression
- """
- if len(p) > 3:
- p[0] = {'decl': p[1], 'bitsize': p[3]}
- else:
- p[0] = {'decl': c_ast.TypeDecl(None, None, None), 'bitsize': p[2]}
- def p_enum_specifier_1(self, p):
- """ enum_specifier : ENUM ID
- | ENUM TYPEID
- """
- p[0] = c_ast.Enum(p[2], None, self._token_coord(p, 1))
- def p_enum_specifier_2(self, p):
- """ enum_specifier : ENUM brace_open enumerator_list brace_close
- """
- p[0] = c_ast.Enum(None, p[3], self._token_coord(p, 1))
- def p_enum_specifier_3(self, p):
- """ enum_specifier : ENUM ID brace_open enumerator_list brace_close
- | ENUM TYPEID brace_open enumerator_list brace_close
- """
- p[0] = c_ast.Enum(p[2], p[4], self._token_coord(p, 1))
- def p_enumerator_list(self, p):
- """ enumerator_list : enumerator
- | enumerator_list COMMA
- | enumerator_list COMMA enumerator
- """
- if len(p) == 2:
- p[0] = c_ast.EnumeratorList([p[1]], p[1].coord)
- elif len(p) == 3:
- p[0] = p[1]
- else:
- p[1].enumerators.append(p[3])
- p[0] = p[1]
- def p_enumerator(self, p):
- """ enumerator : ID
- | ID EQUALS constant_expression
- """
- if len(p) == 2:
- enumerator = c_ast.Enumerator(
- p[1], None,
- self._token_coord(p, 1))
- else:
- enumerator = c_ast.Enumerator(
- p[1], p[3],
- self._token_coord(p, 1))
- self._add_identifier(enumerator.name, enumerator.coord)
- p[0] = enumerator
- def p_declarator(self, p):
- """ declarator : id_declarator
- | typeid_declarator
- """
- p[0] = p[1]
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_xxx_declarator_1(self, p):
- """ xxx_declarator : direct_xxx_declarator
- """
- p[0] = p[1]
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_xxx_declarator_2(self, p):
- """ xxx_declarator : pointer direct_xxx_declarator
- """
- p[0] = self._type_modify_decl(p[2], p[1])
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_direct_xxx_declarator_1(self, p):
- """ direct_xxx_declarator : yyy
- """
- p[0] = c_ast.TypeDecl(
- declname=p[1],
- type=None,
- quals=None,
- coord=self._token_coord(p, 1))
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'))
- def p_direct_xxx_declarator_2(self, p):
- """ direct_xxx_declarator : LPAREN xxx_declarator RPAREN
- """
- p[0] = p[2]
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_direct_xxx_declarator_3(self, p):
- """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
- """
- quals = (p[3] if len(p) > 5 else []) or []
- # Accept dimension qualifiers
- # Per C99 6.7.5.3 p7
- arr = c_ast.ArrayDecl(
- type=None,
- dim=p[4] if len(p) > 5 else p[3],
- dim_quals=quals,
- coord=p[1].coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_direct_xxx_declarator_4(self, p):
- """ direct_xxx_declarator : direct_xxx_declarator LBRACKET STATIC type_qualifier_list_opt assignment_expression RBRACKET
- | direct_xxx_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET
- """
- # Using slice notation for PLY objects doesn't work in Python 3 for the
- # version of PLY embedded with pycparser; see PLY Google Code issue 30.
- # Work around that here by listing the two elements separately.
- listed_quals = [item if isinstance(item, list) else [item]
- for item in [p[3],p[4]]]
- dim_quals = [qual for sublist in listed_quals for qual in sublist
- if qual is not None]
- arr = c_ast.ArrayDecl(
- type=None,
- dim=p[5],
- dim_quals=dim_quals,
- coord=p[1].coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
- # Special for VLAs
- #
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_direct_xxx_declarator_5(self, p):
- """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET
- """
- arr = c_ast.ArrayDecl(
- type=None,
- dim=c_ast.ID(p[4], self._token_coord(p, 4)),
- dim_quals=p[3] if p[3] != None else [],
- coord=p[1].coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_direct_xxx_declarator_6(self, p):
- """ direct_xxx_declarator : direct_xxx_declarator LPAREN parameter_type_list RPAREN
- | direct_xxx_declarator LPAREN identifier_list_opt RPAREN
- """
- func = c_ast.FuncDecl(
- args=p[3],
- type=None,
- coord=p[1].coord)
- # To see why _get_yacc_lookahead_token is needed, consider:
- # typedef char TT;
- # void foo(int TT) { TT = 10; }
- # Outside the function, TT is a typedef, but inside (starting and
- # ending with the braces) it's a parameter. The trouble begins with
- # yacc's lookahead token. We don't know if we're declaring or
- # defining a function until we see LBRACE, but if we wait for yacc to
- # trigger a rule on that token, then TT will have already been read
- # and incorrectly interpreted as TYPEID. We need to add the
- # parameters to the scope the moment the lexer sees LBRACE.
- #
- if self._get_yacc_lookahead_token().type == "LBRACE":
- if func.args is not None:
- for param in func.args.params:
- if isinstance(param, c_ast.EllipsisParam): break
- self._add_identifier(param.name, param.coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=func)
- def p_pointer(self, p):
- """ pointer : TIMES type_qualifier_list_opt
- | TIMES type_qualifier_list_opt pointer
- """
- coord = self._token_coord(p, 1)
- # Pointer decls nest from inside out. This is important when different
- # levels have different qualifiers. For example:
- #
- # char * const * p;
- #
- # Means "pointer to const pointer to char"
- #
- # While:
- #
- # char ** const p;
- #
- # Means "const pointer to pointer to char"
- #
- # So when we construct PtrDecl nestings, the leftmost pointer goes in
- # as the most nested type.
- nested_type = c_ast.PtrDecl(quals=p[2] or [], type=None, coord=coord)
- if len(p) > 3:
- tail_type = p[3]
- while tail_type.type is not None:
- tail_type = tail_type.type
- tail_type.type = nested_type
- p[0] = p[3]
- else:
- p[0] = nested_type
- def p_type_qualifier_list(self, p):
- """ type_qualifier_list : type_qualifier
- | type_qualifier_list type_qualifier
- """
- p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
- def p_parameter_type_list(self, p):
- """ parameter_type_list : parameter_list
- | parameter_list COMMA ELLIPSIS
- """
- if len(p) > 2:
- p[1].params.append(c_ast.EllipsisParam(self._token_coord(p, 3)))
- p[0] = p[1]
- def p_parameter_list(self, p):
- """ parameter_list : parameter_declaration
- | parameter_list COMMA parameter_declaration
- """
- if len(p) == 2: # single parameter
- p[0] = c_ast.ParamList([p[1]], p[1].coord)
- else:
- p[1].params.append(p[3])
- p[0] = p[1]
- # From ISO/IEC 9899:TC2, 6.7.5.3.11:
- # "If, in a parameter declaration, an identifier can be treated either
- # as a typedef name or as a parameter name, it shall be taken as a
- # typedef name."
- #
- # Inside a parameter declaration, once we've reduced declaration specifiers,
- # if we shift in an LPAREN and see a TYPEID, it could be either an abstract
- # declarator or a declarator nested inside parens. This rule tells us to
- # always treat it as an abstract declarator. Therefore, we only accept
- # `id_declarator`s and `typeid_noparen_declarator`s.
- def p_parameter_declaration_1(self, p):
- """ parameter_declaration : declaration_specifiers id_declarator
- | declaration_specifiers typeid_noparen_declarator
- """
- spec = p[1]
- if not spec['type']:
- spec['type'] = [c_ast.IdentifierType(['int'],
- coord=self._token_coord(p, 1))]
- p[0] = self._build_declarations(
- spec=spec,
- decls=[dict(decl=p[2])])[0]
- def p_parameter_declaration_2(self, p):
- """ parameter_declaration : declaration_specifiers abstract_declarator_opt
- """
- spec = p[1]
- if not spec['type']:
- spec['type'] = [c_ast.IdentifierType(['int'],
- coord=self._token_coord(p, 1))]
- # Parameters can have the same names as typedefs. The trouble is that
- # the parameter's name gets grouped into declaration_specifiers, making
- # it look like an old-style declaration; compensate.
- #
- if len(spec['type']) > 1 and len(spec['type'][-1].names) == 1 and \
- self._is_type_in_scope(spec['type'][-1].names[0]):
- decl = self._build_declarations(
- spec=spec,
- decls=[dict(decl=p[2], init=None)])[0]
- # This truly is an old-style parameter declaration
- #
- else:
- decl = c_ast.Typename(
- name='',
- quals=spec['qual'],
- type=p[2] or c_ast.TypeDecl(None, None, None),
- coord=self._token_coord(p, 2))
- typename = spec['type']
- decl = self._fix_decl_name_type(decl, typename)
- p[0] = decl
- def p_identifier_list(self, p):
- """ identifier_list : identifier
- | identifier_list COMMA identifier
- """
- if len(p) == 2: # single parameter
- p[0] = c_ast.ParamList([p[1]], p[1].coord)
- else:
- p[1].params.append(p[3])
- p[0] = p[1]
- def p_initializer_1(self, p):
- """ initializer : assignment_expression
- """
- p[0] = p[1]
- def p_initializer_2(self, p):
- """ initializer : brace_open initializer_list_opt brace_close
- | brace_open initializer_list COMMA brace_close
- """
- if p[2] is None:
- p[0] = c_ast.InitList([], self._token_coord(p, 1))
- else:
- p[0] = p[2]
- def p_initializer_list(self, p):
- """ initializer_list : designation_opt initializer
- | initializer_list COMMA designation_opt initializer
- """
- if len(p) == 3: # single initializer
- init = p[2] if p[1] is None else c_ast.NamedInitializer(p[1], p[2])
- p[0] = c_ast.InitList([init], p[2].coord)
- else:
- init = p[4] if p[3] is None else c_ast.NamedInitializer(p[3], p[4])
- p[1].exprs.append(init)
- p[0] = p[1]
- def p_designation(self, p):
- """ designation : designator_list EQUALS
- """
- p[0] = p[1]
- # Designators are represented as a list of nodes, in the order in which
- # they're written in the code.
- #
- def p_designator_list(self, p):
- """ designator_list : designator
- | designator_list designator
- """
- p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
- def p_designator(self, p):
- """ designator : LBRACKET constant_expression RBRACKET
- | PERIOD identifier
- """
- p[0] = p[2]
- def p_type_name(self, p):
- """ type_name : specifier_qualifier_list abstract_declarator_opt
- """
- typename = c_ast.Typename(
- name='',
- quals=p[1]['qual'],
- type=p[2] or c_ast.TypeDecl(None, None, None),
- coord=self._token_coord(p, 2))
- p[0] = self._fix_decl_name_type(typename, p[1]['type'])
- def p_abstract_declarator_1(self, p):
- """ abstract_declarator : pointer
- """
- dummytype = c_ast.TypeDecl(None, None, None)
- p[0] = self._type_modify_decl(
- decl=dummytype,
- modifier=p[1])
- def p_abstract_declarator_2(self, p):
- """ abstract_declarator : pointer direct_abstract_declarator
- """
- p[0] = self._type_modify_decl(p[2], p[1])
- def p_abstract_declarator_3(self, p):
- """ abstract_declarator : direct_abstract_declarator
- """
- p[0] = p[1]
- # Creating and using direct_abstract_declarator_opt here
- # instead of listing both direct_abstract_declarator and the
- # lack of it in the beginning of _1 and _2 caused two
- # shift/reduce errors.
- #
- def p_direct_abstract_declarator_1(self, p):
- """ direct_abstract_declarator : LPAREN abstract_declarator RPAREN """
- p[0] = p[2]
- def p_direct_abstract_declarator_2(self, p):
- """ direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET
- """
- arr = c_ast.ArrayDecl(
- type=None,
- dim=p[3],
- dim_quals=[],
- coord=p[1].coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
- def p_direct_abstract_declarator_3(self, p):
- """ direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
- """
- quals = (p[2] if len(p) > 4 else []) or []
- p[0] = c_ast.ArrayDecl(
- type=c_ast.TypeDecl(None, None, None),
- dim=p[3] if len(p) > 4 else p[2],
- dim_quals=quals,
- coord=self._token_coord(p, 1))
- def p_direct_abstract_declarator_4(self, p):
- """ direct_abstract_declarator : direct_abstract_declarator LBRACKET TIMES RBRACKET
- """
- arr = c_ast.ArrayDecl(
- type=None,
- dim=c_ast.ID(p[3], self._token_coord(p, 3)),
- dim_quals=[],
- coord=p[1].coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
- def p_direct_abstract_declarator_5(self, p):
- """ direct_abstract_declarator : LBRACKET TIMES RBRACKET
- """
- p[0] = c_ast.ArrayDecl(
- type=c_ast.TypeDecl(None, None, None),
- dim=c_ast.ID(p[3], self._token_coord(p, 3)),
- dim_quals=[],
- coord=self._token_coord(p, 1))
- def p_direct_abstract_declarator_6(self, p):
- """ direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN
- """
- func = c_ast.FuncDecl(
- args=p[3],
- type=None,
- coord=p[1].coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=func)
- def p_direct_abstract_declarator_7(self, p):
- """ direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN
- """
- p[0] = c_ast.FuncDecl(
- args=p[2],
- type=c_ast.TypeDecl(None, None, None),
- coord=self._token_coord(p, 1))
- # declaration is a list, statement isn't. To make it consistent, block_item
- # will always be a list
- #
- def p_block_item(self, p):
- """ block_item : declaration
- | statement
- """
- p[0] = p[1] if isinstance(p[1], list) else [p[1]]
- # Since we made block_item a list, this just combines lists
- #
- def p_block_item_list(self, p):
- """ block_item_list : block_item
- | block_item_list block_item
- """
- # Empty block items (plain ';') produce [None], so ignore them
- p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2]
- def p_compound_statement_1(self, p):
- """ compound_statement : brace_open block_item_list_opt brace_close """
- p[0] = c_ast.Compound(
- block_items=p[2],
- coord=self._token_coord(p, 1))
- def p_labeled_statement_1(self, p):
- """ labeled_statement : ID COLON pragmacomp_or_statement """
- p[0] = c_ast.Label(p[1], p[3], self._token_coord(p, 1))
- def p_labeled_statement_2(self, p):
- """ labeled_statement : CASE constant_expression COLON pragmacomp_or_statement """
- p[0] = c_ast.Case(p[2], [p[4]], self._token_coord(p, 1))
- def p_labeled_statement_3(self, p):
- """ labeled_statement : DEFAULT COLON pragmacomp_or_statement """
- p[0] = c_ast.Default([p[3]], self._token_coord(p, 1))
- def p_selection_statement_1(self, p):
- """ selection_statement : IF LPAREN expression RPAREN pragmacomp_or_statement """
- p[0] = c_ast.If(p[3], p[5], None, self._token_coord(p, 1))
- def p_selection_statement_2(self, p):
- """ selection_statement : IF LPAREN expression RPAREN statement ELSE pragmacomp_or_statement """
- p[0] = c_ast.If(p[3], p[5], p[7], self._token_coord(p, 1))
- def p_selection_statement_3(self, p):
- """ selection_statement : SWITCH LPAREN expression RPAREN pragmacomp_or_statement """
- p[0] = fix_switch_cases(
- c_ast.Switch(p[3], p[5], self._token_coord(p, 1)))
- def p_iteration_statement_1(self, p):
- """ iteration_statement : WHILE LPAREN expression RPAREN pragmacomp_or_statement """
- p[0] = c_ast.While(p[3], p[5], self._token_coord(p, 1))
- def p_iteration_statement_2(self, p):
- """ iteration_statement : DO pragmacomp_or_statement WHILE LPAREN expression RPAREN SEMI """
- p[0] = c_ast.DoWhile(p[5], p[2], self._token_coord(p, 1))
- def p_iteration_statement_3(self, p):
- """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
- p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1))
- def p_iteration_statement_4(self, p):
- """ iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
- p[0] = c_ast.For(c_ast.DeclList(p[3], self._token_coord(p, 1)),
- p[4], p[6], p[8], self._token_coord(p, 1))
- def p_jump_statement_1(self, p):
- """ jump_statement : GOTO ID SEMI """
- p[0] = c_ast.Goto(p[2], self._token_coord(p, 1))
- def p_jump_statement_2(self, p):
- """ jump_statement : BREAK SEMI """
- p[0] = c_ast.Break(self._token_coord(p, 1))
- def p_jump_statement_3(self, p):
- """ jump_statement : CONTINUE SEMI """
- p[0] = c_ast.Continue(self._token_coord(p, 1))
- def p_jump_statement_4(self, p):
- """ jump_statement : RETURN expression SEMI
- | RETURN SEMI
- """
- p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1))
- def p_expression_statement(self, p):
- """ expression_statement : expression_opt SEMI """
- if p[1] is None:
- p[0] = c_ast.EmptyStatement(self._token_coord(p, 2))
- else:
- p[0] = p[1]
- def p_expression(self, p):
- """ expression : assignment_expression
- | expression COMMA assignment_expression
- """
- if len(p) == 2:
- p[0] = p[1]
- else:
- if not isinstance(p[1], c_ast.ExprList):
- p[1] = c_ast.ExprList([p[1]], p[1].coord)
- p[1].exprs.append(p[3])
- p[0] = p[1]
- def p_typedef_name(self, p):
- """ typedef_name : TYPEID """
- p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
- def p_assignment_expression(self, p):
- """ assignment_expression : conditional_expression
- | unary_expression assignment_operator assignment_expression
- """
- if len(p) == 2:
- p[0] = p[1]
- else:
- p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord)
- # K&R2 defines these as many separate rules, to encode
- # precedence and associativity. Why work hard ? I'll just use
- # the built in precedence/associativity specification feature
- # of PLY. (see precedence declaration above)
- #
- def p_assignment_operator(self, p):
- """ assignment_operator : EQUALS
- | XOREQUAL
- | TIMESEQUAL
- | DIVEQUAL
- | MODEQUAL
- | PLUSEQUAL
- | MINUSEQUAL
- | LSHIFTEQUAL
- | RSHIFTEQUAL
- | ANDEQUAL
- | OREQUAL
- """
- p[0] = p[1]
- def p_constant_expression(self, p):
- """ constant_expression : conditional_expression """
- p[0] = p[1]
- def p_conditional_expression(self, p):
- """ conditional_expression : binary_expression
- | binary_expression CONDOP expression COLON conditional_expression
- """
- if len(p) == 2:
- p[0] = p[1]
- else:
- p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord)
- def p_binary_expression(self, p):
- """ binary_expression : cast_expression
- | binary_expression TIMES binary_expression
- | binary_expression DIVIDE binary_expression
- | binary_expression MOD binary_expression
- | binary_expression PLUS binary_expression
- | binary_expression MINUS binary_expression
- | binary_expression RSHIFT binary_expression
- | binary_expression LSHIFT binary_expression
- | binary_expression LT binary_expression
- | binary_expression LE binary_expression
- | binary_expression GE binary_expression
- | binary_expression GT binary_expression
- | binary_expression EQ binary_expression
- | binary_expression NE binary_expression
- | binary_expression AND binary_expression
- | binary_expression OR binary_expression
- | binary_expression XOR binary_expression
- | binary_expression LAND binary_expression
- | binary_expression LOR binary_expression
- """
- if len(p) == 2:
- p[0] = p[1]
- else:
- p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord)
- def p_cast_expression_1(self, p):
- """ cast_expression : unary_expression """
- p[0] = p[1]
- def p_cast_expression_2(self, p):
- """ cast_expression : LPAREN type_name RPAREN cast_expression """
- p[0] = c_ast.Cast(p[2], p[4], self._token_coord(p, 1))
- def p_unary_expression_1(self, p):
- """ unary_expression : postfix_expression """
- p[0] = p[1]
- def p_unary_expression_2(self, p):
- """ unary_expression : PLUSPLUS unary_expression
- | MINUSMINUS unary_expression
- | unary_operator cast_expression
- """
- p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord)
- def p_unary_expression_3(self, p):
- """ unary_expression : SIZEOF unary_expression
- | SIZEOF LPAREN type_name RPAREN
- """
- p[0] = c_ast.UnaryOp(
- p[1],
- p[2] if len(p) == 3 else p[3],
- self._token_coord(p, 1))
- def p_unary_operator(self, p):
- """ unary_operator : AND
- | TIMES
- | PLUS
- | MINUS
- | NOT
- | LNOT
- """
- p[0] = p[1]
- def p_postfix_expression_1(self, p):
- """ postfix_expression : primary_expression """
- p[0] = p[1]
- def p_postfix_expression_2(self, p):
- """ postfix_expression : postfix_expression LBRACKET expression RBRACKET """
- p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
- def p_postfix_expression_3(self, p):
- """ postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN
- | postfix_expression LPAREN RPAREN
- """
- p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord)
- def p_postfix_expression_4(self, p):
- """ postfix_expression : postfix_expression PERIOD ID
- | postfix_expression PERIOD TYPEID
- | postfix_expression ARROW ID
- | postfix_expression ARROW TYPEID
- """
- field = c_ast.ID(p[3], self._token_coord(p, 3))
- p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord)
- def p_postfix_expression_5(self, p):
- """ postfix_expression : postfix_expression PLUSPLUS
- | postfix_expression MINUSMINUS
- """
- p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord)
- def p_postfix_expression_6(self, p):
- """ postfix_expression : LPAREN type_name RPAREN brace_open initializer_list brace_close
- | LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close
- """
- p[0] = c_ast.CompoundLiteral(p[2], p[5])
- def p_primary_expression_1(self, p):
- """ primary_expression : identifier """
- p[0] = p[1]
- def p_primary_expression_2(self, p):
- """ primary_expression : constant """
- p[0] = p[1]
- def p_primary_expression_3(self, p):
- """ primary_expression : unified_string_literal
- | unified_wstring_literal
- """
- p[0] = p[1]
- def p_primary_expression_4(self, p):
- """ primary_expression : LPAREN expression RPAREN """
- p[0] = p[2]
- def p_primary_expression_5(self, p):
- """ primary_expression : OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN
- """
- coord = self._token_coord(p, 1)
- p[0] = c_ast.FuncCall(c_ast.ID(p[1], coord),
- c_ast.ExprList([p[3], p[5]], coord),
- coord)
- def p_offsetof_member_designator(self, p):
- """ offsetof_member_designator : identifier
- | offsetof_member_designator PERIOD identifier
- | offsetof_member_designator LBRACKET expression RBRACKET
- """
- if len(p) == 2:
- p[0] = p[1]
- elif len(p) == 4:
- p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord)
- elif len(p) == 5:
- p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
- else:
- raise NotImplementedError("Unexpected parsing state. len(p): %u" % len(p))
- def p_argument_expression_list(self, p):
- """ argument_expression_list : assignment_expression
- | argument_expression_list COMMA assignment_expression
- """
- if len(p) == 2: # single expr
- p[0] = c_ast.ExprList([p[1]], p[1].coord)
- else:
- p[1].exprs.append(p[3])
- p[0] = p[1]
- def p_identifier(self, p):
- """ identifier : ID """
- p[0] = c_ast.ID(p[1], self._token_coord(p, 1))
- def p_constant_1(self, p):
- """ constant : INT_CONST_DEC
- | INT_CONST_OCT
- | INT_CONST_HEX
- | INT_CONST_BIN
- | INT_CONST_CHAR
- """
- uCount = 0
- lCount = 0
- for x in p[1][-3:]:
- if x in ('l', 'L'):
- lCount += 1
- elif x in ('u', 'U'):
- uCount += 1
- t = ''
- if uCount > 1:
- raise ValueError('Constant cannot have more than one u/U suffix.')
- elif lCount > 2:
- raise ValueError('Constant cannot have more than two l/L suffix.')
- prefix = 'unsigned ' * uCount + 'long ' * lCount
- p[0] = c_ast.Constant(
- prefix + 'int', p[1], self._token_coord(p, 1))
- def p_constant_2(self, p):
- """ constant : FLOAT_CONST
- | HEX_FLOAT_CONST
- """
- if 'x' in p[1].lower():
- t = 'float'
- else:
- if p[1][-1] in ('f', 'F'):
- t = 'float'
- elif p[1][-1] in ('l', 'L'):
- t = 'long double'
- else:
- t = 'double'
- p[0] = c_ast.Constant(
- t, p[1], self._token_coord(p, 1))
- def p_constant_3(self, p):
- """ constant : CHAR_CONST
- | WCHAR_CONST
- """
- p[0] = c_ast.Constant(
- 'char', p[1], self._token_coord(p, 1))
- # The "unified" string and wstring literal rules are for supporting
- # concatenation of adjacent string literals.
- # I.e. "hello " "world" is seen by the C compiler as a single string literal
- # with the value "hello world"
- #
- def p_unified_string_literal(self, p):
- """ unified_string_literal : STRING_LITERAL
- | unified_string_literal STRING_LITERAL
- """
- if len(p) == 2: # single literal
- p[0] = c_ast.Constant(
- 'string', p[1], self._token_coord(p, 1))
- else:
- p[1].value = p[1].value[:-1] + p[2][1:]
- p[0] = p[1]
- def p_unified_wstring_literal(self, p):
- """ unified_wstring_literal : WSTRING_LITERAL
- | unified_wstring_literal WSTRING_LITERAL
- """
- if len(p) == 2: # single literal
- p[0] = c_ast.Constant(
- 'string', p[1], self._token_coord(p, 1))
- else:
- p[1].value = p[1].value.rstrip()[:-1] + p[2][2:]
- p[0] = p[1]
- def p_brace_open(self, p):
- """ brace_open : LBRACE
- """
- p[0] = p[1]
- p.set_lineno(0, p.lineno(1))
- def p_brace_close(self, p):
- """ brace_close : RBRACE
- """
- p[0] = p[1]
- p.set_lineno(0, p.lineno(1))
- def p_empty(self, p):
- 'empty : '
- p[0] = None
- def p_error(self, p):
- # If error recovery is added here in the future, make sure
- # _get_yacc_lookahead_token still works!
- #
- if p:
- self._parse_error(
- 'before: %s' % p.value,
- self._coord(lineno=p.lineno,
- column=self.clex.find_tok_column(p)))
- else:
- self._parse_error('At end of input', self.clex.filename)
|