Add .explain trigger to explain cdecls

This commit is contained in:
melonhead@grimace 2009-11-12 18:05:07 -05:00
parent c7f6451459
commit de9203c904
17 changed files with 9051 additions and 0 deletions

14
plugins/explain.py Executable file
View File

@ -0,0 +1,14 @@
from pycparser.cdecl import explain_c_declaration
@hook.command('explain')
def explain(inp):
'''.explain char *(*(**foo[][8])())[]; -- returns :
foo is a array of array[8] of pointer to pointer to function() returning pointer
to array of pointer to char
'''
if not inp:
return None
result = explain_c_declaration(inp)
if result: return result
else: return None

106
plugins/pycparser/cdecl.py Normal file
View File

@ -0,0 +1,106 @@
#-----------------------------------------------------------------
# pycparser: cdecl.py
#
# Example of the CDECL tool using pycparser. CDECL "explains"
# C type declarations in plain English.
#
# The AST generated by pycparser from the given declaration is
# traversed recursively to build the explanation.
# Note that the declaration must be a valid external declaration
# in C. All the types used in it must be defined with typedef,
# or parsing will fail. The definition can be arbitrary, it isn't
# really used - by pycparser must know which tokens are types.
#
# For example:
#
# 'typedef int Node; const Node* (*ar)[10];'
# =>
# ar is a pointer to array[10] of pointer to const Node
#
# Copyright (C) 2008, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
import sys
# This is not required if you've installed pycparser into
# your site-packages/ with setup.py
#
sys.path.insert(0, '..')
from pycparser import c_parser, c_ast
def explain_c_declaration(c_decl):
""" Parses the declaration in c_decl and returns a text
explanation as a string.
The last external node of the string is used, to allow
earlier typedefs for used types.
"""
parser = c_parser.CParser()
try:
node = parser.parse(c_decl, filename='<stdin>')
except c_parser.ParseError, e:
return None
if ( not isinstance(node, c_ast.FileAST) or
not isinstance(node.ext[-1], c_ast.Decl)):
return None
return _explain_decl_node(node.ext[-1])
def _explain_decl_node(decl_node):
""" Receives a c_ast.Decl note and returns its explanation in
English.
"""
#~ print decl_node.show()
storage = ' '.join(decl_node.storage) + ' ' if decl_node.storage else ''
return (decl_node.name +
" is a " +
storage +
_explain_type(decl_node.type))
def _explain_type(decl):
""" Recursively explains a type decl node
"""
typ = type(decl)
if typ == c_ast.TypeDecl:
quals = ' '.join(decl.quals) + ' ' if decl.quals else ''
return quals + _explain_type(decl.type)
elif typ == c_ast.Typename or typ == c_ast.Decl:
return _explain_type(decl.type)
elif typ == c_ast.IdentifierType:
return ' '.join(decl.names)
elif typ == c_ast.PtrDecl:
quals = ' '.join(decl.quals) + ' ' if decl.quals else ''
return quals + 'pointer to ' + _explain_type(decl.type)
elif typ == c_ast.ArrayDecl:
arr = 'array'
if decl.dim: arr += '[%s]' % decl.dim.value
return arr + " of " + _explain_type(decl.type)
elif typ == c_ast.FuncDecl:
if decl.args:
params = [_explain_type(param) for param in decl.args.params]
args = ', '.join(params)
else:
args = ''
return ('function(%s) returning ' % (args) +
_explain_type(decl.type))
if __name__ == "__main__":
if len(sys.argv) > 1:
c_decl = sys.argv[1]
else:
c_decl = "char *(*(**foo[][8])())[];"
print "Explaining the declaration:", c_decl
print "\n", explain_c_declaration(c_decl)

View File

@ -0,0 +1,9 @@
# pycparser.lextab.py. This file automatically created by PLY (version 3.3). Don't edit!
_tabversion = '3.3'
_lextokens = {'VOID': 1, 'LBRACKET': 1, 'WCHAR_CONST': 1, 'FLOAT_CONST': 1, 'MINUS': 1, 'RPAREN': 1, 'LONG': 1, 'PLUS': 1, 'ELLIPSIS': 1, 'GT': 1, 'GOTO': 1, 'ENUM': 1, 'PERIOD': 1, 'GE': 1, 'INT_CONST_DEC': 1, 'ARROW': 1, 'DOUBLE': 1, 'MINUSEQUAL': 1, 'INT_CONST_OCT': 1, 'TIMESEQUAL': 1, 'OR': 1, 'SHORT': 1, 'RETURN': 1, 'RSHIFTEQUAL': 1, 'STATIC': 1, 'SIZEOF': 1, 'UNSIGNED': 1, 'UNION': 1, 'COLON': 1, 'WSTRING_LITERAL': 1, 'DIVIDE': 1, 'FOR': 1, 'PLUSPLUS': 1, 'EQUALS': 1, 'ELSE': 1, 'EQ': 1, 'AND': 1, 'TYPEID': 1, 'LBRACE': 1, 'PPHASH': 1, 'INT': 1, 'SIGNED': 1, 'CONTINUE': 1, 'NOT': 1, 'OREQUAL': 1, 'MOD': 1, 'RSHIFT': 1, 'DEFAULT': 1, 'CHAR': 1, 'WHILE': 1, 'DIVEQUAL': 1, 'EXTERN': 1, 'CASE': 1, 'LAND': 1, 'REGISTER': 1, 'MODEQUAL': 1, 'NE': 1, 'SWITCH': 1, 'INT_CONST_HEX': 1, 'PLUSEQUAL': 1, 'STRUCT': 1, 'CONDOP': 1, 'BREAK': 1, 'VOLATILE': 1, 'ANDEQUAL': 1, 'DO': 1, 'LNOT': 1, 'CONST': 1, 'LOR': 1, 'CHAR_CONST': 1, 'LSHIFT': 1, 'RBRACE': 1, 'LE': 1, 'SEMI': 1, 'LT': 1, 'COMMA': 1, 'TYPEDEF': 1, 'XOR': 1, 'AUTO': 1, 'TIMES': 1, 'LPAREN': 1, 'MINUSMINUS': 1, 'ID': 1, 'IF': 1, 'STRING_LITERAL': 1, 'FLOAT': 1, 'XOREQUAL': 1, 'LSHIFTEQUAL': 1, 'RBRACKET': 1}
_lexreflags = 0
_lexliterals = ''
_lexstateinfo = {'ppline': 'exclusive', 'INITIAL': 'inclusive'}
_lexstatere = {'ppline': [('(?P<t_ppline_FILENAME>"([^"\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*")|(?P<t_ppline_LINE_NUMBER>(0(([uU][lL])|([lL][uU])|[uU]|[lL])?)|([1-9][0-9]*(([uU][lL])|([lL][uU])|[uU]|[lL])?))|(?P<t_ppline_NEWLINE>\\n)|(?P<t_ppline_PPLINE>line)', [None, ('t_ppline_FILENAME', 'FILENAME'), None, None, None, None, None, None, ('t_ppline_LINE_NUMBER', 'LINE_NUMBER'), None, None, None, None, None, None, None, None, ('t_ppline_NEWLINE', 'NEWLINE'), ('t_ppline_PPLINE', 'PPLINE')])], 'INITIAL': [('(?P<t_PPHASH>[ \\t]*\\#)|(?P<t_NEWLINE>\\n+)|(?P<t_FLOAT_CONST>((((([0-9]*\\.[0-9]+)|([0-9]+\\.))([eE][-+]?[0-9]+)?)|([0-9]+([eE][-+]?[0-9]+)))[FfLl]?))|(?P<t_INT_CONST_HEX>0[xX][0-9a-fA-F]+(([uU][lL])|([lL][uU])|[uU]|[lL])?)|(?P<t_BAD_CONST_OCT>0[0-7]*[89])|(?P<t_INT_CONST_OCT>0[0-7]*(([uU][lL])|([lL][uU])|[uU]|[lL])?)|(?P<t_INT_CONST_DEC>(0(([uU][lL])|([lL][uU])|[uU]|[lL])?)|([1-9][0-9]*(([uU][lL])|([lL][uU])|[uU]|[lL])?))|(?P<t_CHAR_CONST>\'([^\'\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))\')|(?P<t_WCHAR_CONST>L\'([^\'\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))\')|(?P<t_UNMATCHED_QUOTE>(\'([^\'\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*\\n)|(\'([^\'\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*$))|(?P<t_BAD_CHAR_CONST>(\'([^\'\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))[^\'\n]+\')|(\'\')|(\'([\\\\][^a-zA-Z\\\\?\'"x0-7])[^\'\\n]*\'))|(?P<t_WSTRING_LITERAL>L"([^"\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*")|(?P<t_BAD_STRING_LITERAL>"([^"\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*([\\\\][^a-zA-Z\\\\?\'"x0-7])([^"\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*")|(?P<t_ID>[a-zA-Z_][0-9a-zA-Z_]*)|(?P<t_STRING_LITERAL>"([^"\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*")', [None, ('t_PPHASH', 'PPHASH'), ('t_NEWLINE', 'NEWLINE'), ('t_FLOAT_CONST', 'FLOAT_CONST'), None, None, None, None, None, None, None, None, None, ('t_INT_CONST_HEX', 'INT_CONST_HEX'), None, None, None, ('t_BAD_CONST_OCT', 'BAD_CONST_OCT'), ('t_INT_CONST_OCT', 'INT_CONST_OCT'), None, None, None, ('t_INT_CONST_DEC', 'INT_CONST_DEC'), None, None, None, None, None, None, None, None, ('t_CHAR_CONST', 'CHAR_CONST'), None, None, None, None, None, None, ('t_WCHAR_CONST', 'WCHAR_CONST'), None, None, None, None, None, None, ('t_UNMATCHED_QUOTE', 'UNMATCHED_QUOTE'), None, None, None, None, None, None, None, None, None, None, None, None, None, None, ('t_BAD_CHAR_CONST', 'BAD_CHAR_CONST'), None, None, None, None, None, None, None, None, None, None, ('t_WSTRING_LITERAL', 'WSTRING_LITERAL'), None, None, None, None, None, None, ('t_BAD_STRING_LITERAL', 'BAD_STRING_LITERAL'), None, None, None, None, None, None, None, None, None, None, None, None, None, ('t_ID', 'ID'), (None, 'STRING_LITERAL')]), ('(?P<t_ELLIPSIS>\\.\\.\\.)|(?P<t_PLUSPLUS>\\+\\+)|(?P<t_LOR>\\|\\|)|(?P<t_OREQUAL>\\|=)|(?P<t_LSHIFTEQUAL><<=)|(?P<t_RSHIFTEQUAL>>>=)|(?P<t_TIMESEQUAL>\\*=)|(?P<t_PLUSEQUAL>\\+=)|(?P<t_XOREQUAL>^=)|(?P<t_PLUS>\\+)|(?P<t_MODEQUAL>%=)|(?P<t_LBRACE>\\{)|(?P<t_DIVEQUAL>/=)|(?P<t_RBRACKET>\\])|(?P<t_CONDOP>\\?)', [None, (None, 'ELLIPSIS'), (None, 'PLUSPLUS'), (None, 'LOR'), (None, 'OREQUAL'), (None, 'LSHIFTEQUAL'), (None, 'RSHIFTEQUAL'), (None, 'TIMESEQUAL'), (None, 'PLUSEQUAL'), (None, 'XOREQUAL'), (None, 'PLUS'), (None, 'MODEQUAL'), (None, 'LBRACE'), (None, 'DIVEQUAL'), (None, 'RBRACKET'), (None, 'CONDOP')]), ('(?P<t_XOR>\\^)|(?P<t_LSHIFT><<)|(?P<t_LE><=)|(?P<t_LPAREN>\\()|(?P<t_ARROW>->)|(?P<t_EQ>==)|(?P<t_RBRACE>\\})|(?P<t_NE>!=)|(?P<t_MINUSMINUS>--)|(?P<t_OR>\\|)|(?P<t_TIMES>\\*)|(?P<t_LBRACKET>\\[)|(?P<t_GE>>=)|(?P<t_RPAREN>\\))|(?P<t_LAND>&&)|(?P<t_RSHIFT>>>)|(?P<t_ANDEQUAL>&=)|(?P<t_MINUSEQUAL>-=)|(?P<t_PERIOD>\\.)|(?P<t_EQUALS>=)|(?P<t_LT><)|(?P<t_COMMA>,)|(?P<t_DIVIDE>/)|(?P<t_AND>&)|(?P<t_MOD>%)|(?P<t_SEMI>;)|(?P<t_MINUS>-)|(?P<t_GT>>)|(?P<t_COLON>:)|(?P<t_NOT>~)|(?P<t_LNOT>!)', [None, (None, 'XOR'), (None, 'LSHIFT'), (None, 'LE'), (None, 'LPAREN'), (None, 'ARROW'), (None, 'EQ'), (None, 'RBRACE'), (None, 'NE'), (None, 'MINUSMINUS'), (None, 'OR'), (None, 'TIMES'), (None, 'LBRACKET'), (None, 'GE'), (None, 'RPAREN'), (None, 'LAND'), (None, 'RSHIFT'), (None, 'ANDEQUAL'), (None, 'MINUSEQUAL'), (None, 'PERIOD'), (None, 'EQUALS'), (None, 'LT'), (None, 'COMMA'), (None, 'DIVIDE'), (None, 'AND'), (None, 'MOD'), (None, 'SEMI'), (None, 'MINUS'), (None, 'GT'), (None, 'COLON'), (None, 'NOT'), (None, 'LNOT')])]}
_lexstateignore = {'ppline': ' \t', 'INITIAL': ' \t'}
_lexstateerrorf = {'ppline': 't_ppline_error', 'INITIAL': 't_error'}

View File

@ -0,0 +1,4 @@
# PLY package
# Author: David Beazley (dave@dabeaz.com)
__all__ = ['lex','yacc']

View File

@ -0,0 +1,898 @@
# -----------------------------------------------------------------------------
# cpp.py
#
# Author: David Beazley (http://www.dabeaz.com)
# Copyright (C) 2007
# All rights reserved
#
# This module implements an ANSI-C style lexical preprocessor for PLY.
# -----------------------------------------------------------------------------
from __future__ import generators
# -----------------------------------------------------------------------------
# Default preprocessor lexer definitions. These tokens are enough to get
# a basic preprocessor working. Other modules may import these if they want
# -----------------------------------------------------------------------------
tokens = (
'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT', 'CPP_POUND','CPP_DPOUND'
)
literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
# Whitespace
def t_CPP_WS(t):
r'\s+'
t.lexer.lineno += t.value.count("\n")
return t
t_CPP_POUND = r'\#'
t_CPP_DPOUND = r'\#\#'
# Identifier
t_CPP_ID = r'[A-Za-z_][\w_]*'
# Integer literal
def CPP_INTEGER(t):
r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU]|[lL]|[uU][lL]|[lL][uU])?)'
return t
t_CPP_INTEGER = CPP_INTEGER
# Floating literal
t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
# String literal
def t_CPP_STRING(t):
r'\"([^\\\n]|(\\(.|\n)))*?\"'
t.lexer.lineno += t.value.count("\n")
return t
# Character constant 'c' or L'c'
def t_CPP_CHAR(t):
r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
t.lexer.lineno += t.value.count("\n")
return t
# Comment
def t_CPP_COMMENT(t):
r'(/\*(.|\n)*?\*/)|(//.*?\n)'
t.lexer.lineno += t.value.count("\n")
return t
def t_error(t):
t.type = t.value[0]
t.value = t.value[0]
t.lexer.skip(1)
return t
import re
import copy
import time
import os.path
# -----------------------------------------------------------------------------
# trigraph()
#
# Given an input string, this function replaces all trigraph sequences.
# The following mapping is used:
#
# ??= #
# ??/ \
# ??' ^
# ??( [
# ??) ]
# ??! |
# ??< {
# ??> }
# ??- ~
# -----------------------------------------------------------------------------
_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
_trigraph_rep = {
'=':'#',
'/':'\\',
"'":'^',
'(':'[',
')':']',
'!':'|',
'<':'{',
'>':'}',
'-':'~'
}
def trigraph(input):
return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
# ------------------------------------------------------------------
# Macro object
#
# This object holds information about preprocessor macros
#
# .name - Macro name (string)
# .value - Macro value (a list of tokens)
# .arglist - List of argument names
# .variadic - Boolean indicating whether or not variadic macro
# .vararg - Name of the variadic parameter
#
# When a macro is created, the macro replacement token sequence is
# pre-scanned and used to create patch lists that are later used
# during macro expansion
# ------------------------------------------------------------------
class Macro(object):
def __init__(self,name,value,arglist=None,variadic=False):
self.name = name
self.value = value
self.arglist = arglist
self.variadic = variadic
if variadic:
self.vararg = arglist[-1]
self.source = None
# ------------------------------------------------------------------
# Preprocessor object
#
# Object representing a preprocessor. Contains macro definitions,
# include directories, and other information
# ------------------------------------------------------------------
class Preprocessor(object):
def __init__(self,lexer=None):
if lexer is None:
lexer = lex.lexer
self.lexer = lexer
self.macros = { }
self.path = []
self.temp_path = []
# Probe the lexer for selected tokens
self.lexprobe()
tm = time.localtime()
self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
self.parser = None
# -----------------------------------------------------------------------------
# tokenize()
#
# Utility function. Given a string of text, tokenize into a list of tokens
# -----------------------------------------------------------------------------
def tokenize(self,text):
tokens = []
self.lexer.input(text)
while True:
tok = self.lexer.token()
if not tok: break
tokens.append(tok)
return tokens
# ---------------------------------------------------------------------
# error()
#
# Report a preprocessor error/warning of some kind
# ----------------------------------------------------------------------
def error(self,file,line,msg):
print >>sys.stderr,"%s:%d %s" % (file,line,msg)
# ----------------------------------------------------------------------
# lexprobe()
#
# This method probes the preprocessor lexer object to discover
# the token types of symbols that are important to the preprocessor.
# If this works right, the preprocessor will simply "work"
# with any suitable lexer regardless of how tokens have been named.
# ----------------------------------------------------------------------
def lexprobe(self):
# Determine the token type for identifiers
self.lexer.input("identifier")
tok = self.lexer.token()
if not tok or tok.value != "identifier":
print "Couldn't determine identifier type"
else:
self.t_ID = tok.type
# Determine the token type for integers
self.lexer.input("12345")
tok = self.lexer.token()
if not tok or int(tok.value) != 12345:
print "Couldn't determine integer type"
else:
self.t_INTEGER = tok.type
self.t_INTEGER_TYPE = type(tok.value)
# Determine the token type for strings enclosed in double quotes
self.lexer.input("\"filename\"")
tok = self.lexer.token()
if not tok or tok.value != "\"filename\"":
print "Couldn't determine string type"
else:
self.t_STRING = tok.type
# Determine the token type for whitespace--if any
self.lexer.input(" ")
tok = self.lexer.token()
if not tok or tok.value != " ":
self.t_SPACE = None
else:
self.t_SPACE = tok.type
# Determine the token type for newlines
self.lexer.input("\n")
tok = self.lexer.token()
if not tok or tok.value != "\n":
self.t_NEWLINE = None
print "Couldn't determine token for newlines"
else:
self.t_NEWLINE = tok.type
self.t_WS = (self.t_SPACE, self.t_NEWLINE)
# Check for other characters used by the preprocessor
chars = [ '<','>','#','##','\\','(',')',',','.']
for c in chars:
self.lexer.input(c)
tok = self.lexer.token()
if not tok or tok.value != c:
print "Unable to lex '%s' required for preprocessor" % c
# ----------------------------------------------------------------------
# add_path()
#
# Adds a search path to the preprocessor.
# ----------------------------------------------------------------------
def add_path(self,path):
self.path.append(path)
# ----------------------------------------------------------------------
# group_lines()
#
# Given an input string, this function splits it into lines. Trailing whitespace
# is removed. Any line ending with \ is grouped with the next line. This
# function forms the lowest level of the preprocessor---grouping into text into
# a line-by-line format.
# ----------------------------------------------------------------------
def group_lines(self,input):
lex = self.lexer.clone()
lines = [x.rstrip() for x in input.splitlines()]
for i in xrange(len(lines)):
j = i+1
while lines[i].endswith('\\') and (j < len(lines)):
lines[i] = lines[i][:-1]+lines[j]
lines[j] = ""
j += 1
input = "\n".join(lines)
lex.input(input)
lex.lineno = 1
current_line = []
while True:
tok = lex.token()
if not tok:
break
current_line.append(tok)
if tok.type in self.t_WS and '\n' in tok.value:
yield current_line
current_line = []
if current_line:
yield current_line
# ----------------------------------------------------------------------
# tokenstrip()
#
# Remove leading/trailing whitespace tokens from a token list
# ----------------------------------------------------------------------
def tokenstrip(self,tokens):
i = 0
while i < len(tokens) and tokens[i].type in self.t_WS:
i += 1
del tokens[:i]
i = len(tokens)-1
while i >= 0 and tokens[i].type in self.t_WS:
i -= 1
del tokens[i+1:]
return tokens
# ----------------------------------------------------------------------
# collect_args()
#
# Collects comma separated arguments from a list of tokens. The arguments
# must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
# where tokencount is the number of tokens consumed, args is a list of arguments,
# and positions is a list of integers containing the starting index of each
# argument. Each argument is represented by a list of tokens.
#
# When collecting arguments, leading and trailing whitespace is removed
# from each argument.
#
# This function properly handles nested parenthesis and commas---these do not
# define new arguments.
# ----------------------------------------------------------------------
def collect_args(self,tokenlist):
args = []
positions = []
current_arg = []
nesting = 1
tokenlen = len(tokenlist)
# Search for the opening '('.
i = 0
while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
i += 1
if (i < tokenlen) and (tokenlist[i].value == '('):
positions.append(i+1)
else:
self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
return 0, [], []
i += 1
while i < tokenlen:
t = tokenlist[i]
if t.value == '(':
current_arg.append(t)
nesting += 1
elif t.value == ')':
nesting -= 1
if nesting == 0:
if current_arg:
args.append(self.tokenstrip(current_arg))
positions.append(i)
return i+1,args,positions
current_arg.append(t)
elif t.value == ',' and nesting == 1:
args.append(self.tokenstrip(current_arg))
positions.append(i+1)
current_arg = []
else:
current_arg.append(t)
i += 1
# Missing end argument
self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
return 0, [],[]
# ----------------------------------------------------------------------
# macro_prescan()
#
# Examine the macro value (token sequence) and identify patch points
# This is used to speed up macro expansion later on---we'll know
# right away where to apply patches to the value to form the expansion
# ----------------------------------------------------------------------
def macro_prescan(self,macro):
macro.patch = [] # Standard macro arguments
macro.str_patch = [] # String conversion expansion
macro.var_comma_patch = [] # Variadic macro comma patch
i = 0
while i < len(macro.value):
if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
argnum = macro.arglist.index(macro.value[i].value)
# Conversion of argument to a string
if i > 0 and macro.value[i-1].value == '#':
macro.value[i] = copy.copy(macro.value[i])
macro.value[i].type = self.t_STRING
del macro.value[i-1]
macro.str_patch.append((argnum,i-1))
continue
# Concatenation
elif (i > 0 and macro.value[i-1].value == '##'):
macro.patch.append(('c',argnum,i-1))
del macro.value[i-1]
continue
elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
macro.patch.append(('c',argnum,i))
i += 1
continue
# Standard expansion
else:
macro.patch.append(('e',argnum,i))
elif macro.value[i].value == '##':
if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
(macro.value[i+1].value == macro.vararg):
macro.var_comma_patch.append(i-1)
i += 1
macro.patch.sort(key=lambda x: x[2],reverse=True)
# ----------------------------------------------------------------------
# macro_expand_args()
#
# Given a Macro and list of arguments (each a token list), this method
# returns an expanded version of a macro. The return value is a token sequence
# representing the replacement macro tokens
# ----------------------------------------------------------------------
def macro_expand_args(self,macro,args):
# Make a copy of the macro token sequence
rep = [copy.copy(_x) for _x in macro.value]
# Make string expansion patches. These do not alter the length of the replacement sequence
str_expansion = {}
for argnum, i in macro.str_patch:
if argnum not in str_expansion:
str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
rep[i] = copy.copy(rep[i])
rep[i].value = str_expansion[argnum]
# Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
comma_patch = False
if macro.variadic and not args[-1]:
for i in macro.var_comma_patch:
rep[i] = None
comma_patch = True
# Make all other patches. The order of these matters. It is assumed that the patch list
# has been sorted in reverse order of patch location since replacements will cause the
# size of the replacement sequence to expand from the patch point.
expanded = { }
for ptype, argnum, i in macro.patch:
# Concatenation. Argument is left unexpanded
if ptype == 'c':
rep[i:i+1] = args[argnum]
# Normal expansion. Argument is macro expanded first
elif ptype == 'e':
if argnum not in expanded:
expanded[argnum] = self.expand_macros(args[argnum])
rep[i:i+1] = expanded[argnum]
# Get rid of removed comma if necessary
if comma_patch:
rep = [_i for _i in rep if _i]
return rep
# ----------------------------------------------------------------------
# expand_macros()
#
# Given a list of tokens, this function performs macro expansion.
# The expanded argument is a dictionary that contains macros already
# expanded. This is used to prevent infinite recursion.
# ----------------------------------------------------------------------
def expand_macros(self,tokens,expanded=None):
if expanded is None:
expanded = {}
i = 0
while i < len(tokens):
t = tokens[i]
if t.type == self.t_ID:
if t.value in self.macros and t.value not in expanded:
# Yes, we found a macro match
expanded[t.value] = True
m = self.macros[t.value]
if not m.arglist:
# A simple macro
ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
for e in ex:
e.lineno = t.lineno
tokens[i:i+1] = ex
i += len(ex)
else:
# A macro with arguments
j = i + 1
while j < len(tokens) and tokens[j].type in self.t_WS:
j += 1
if tokens[j].value == '(':
tokcount,args,positions = self.collect_args(tokens[j:])
if not m.variadic and len(args) != len(m.arglist):
self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
i = j + tokcount
elif m.variadic and len(args) < len(m.arglist)-1:
if len(m.arglist) > 2:
self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
else:
self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
i = j + tokcount
else:
if m.variadic:
if len(args) == len(m.arglist)-1:
args.append([])
else:
args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
del args[len(m.arglist):]
# Get macro replacement text
rep = self.macro_expand_args(m,args)
rep = self.expand_macros(rep,expanded)
for r in rep:
r.lineno = t.lineno
tokens[i:j+tokcount] = rep
i += len(rep)
del expanded[t.value]
continue
elif t.value == '__LINE__':
t.type = self.t_INTEGER
t.value = self.t_INTEGER_TYPE(t.lineno)
i += 1
return tokens
# ----------------------------------------------------------------------
# evalexpr()
#
# Evaluate an expression token sequence for the purposes of evaluating
# integral expressions.
# ----------------------------------------------------------------------
def evalexpr(self,tokens):
# tokens = tokenize(line)
# Search for defined macros
i = 0
while i < len(tokens):
if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
j = i + 1
needparen = False
result = "0L"
while j < len(tokens):
if tokens[j].type in self.t_WS:
j += 1
continue
elif tokens[j].type == self.t_ID:
if tokens[j].value in self.macros:
result = "1L"
else:
result = "0L"
if not needparen: break
elif tokens[j].value == '(':
needparen = True
elif tokens[j].value == ')':
break
else:
self.error(self.source,tokens[i].lineno,"Malformed defined()")
j += 1
tokens[i].type = self.t_INTEGER
tokens[i].value = self.t_INTEGER_TYPE(result)
del tokens[i+1:j+1]
i += 1
tokens = self.expand_macros(tokens)
for i,t in enumerate(tokens):
if t.type == self.t_ID:
tokens[i] = copy.copy(t)
tokens[i].type = self.t_INTEGER
tokens[i].value = self.t_INTEGER_TYPE("0L")
elif t.type == self.t_INTEGER:
tokens[i] = copy.copy(t)
# Strip off any trailing suffixes
tokens[i].value = str(tokens[i].value)
while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
tokens[i].value = tokens[i].value[:-1]
expr = "".join([str(x.value) for x in tokens])
expr = expr.replace("&&"," and ")
expr = expr.replace("||"," or ")
expr = expr.replace("!"," not ")
try:
result = eval(expr)
except StandardError:
self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
result = 0
return result
# ----------------------------------------------------------------------
# parsegen()
#
# Parse an input string/
# ----------------------------------------------------------------------
def parsegen(self,input,source=None):
# Replace trigraph sequences
t = trigraph(input)
lines = self.group_lines(t)
if not source:
source = ""
self.define("__FILE__ \"%s\"" % source)
self.source = source
chunk = []
enable = True
iftrigger = False
ifstack = []
for x in lines:
for i,tok in enumerate(x):
if tok.type not in self.t_WS: break
if tok.value == '#':
# Preprocessor directive
for tok in x:
if tok in self.t_WS and '\n' in tok.value:
chunk.append(tok)
dirtokens = self.tokenstrip(x[i+1:])
if dirtokens:
name = dirtokens[0].value
args = self.tokenstrip(dirtokens[1:])
else:
name = ""
args = []
if name == 'define':
if enable:
for tok in self.expand_macros(chunk):
yield tok
chunk = []
self.define(args)
elif name == 'include':
if enable:
for tok in self.expand_macros(chunk):
yield tok
chunk = []
oldfile = self.macros['__FILE__']
for tok in self.include(args):
yield tok
self.macros['__FILE__'] = oldfile
self.source = source
elif name == 'undef':
if enable:
for tok in self.expand_macros(chunk):
yield tok
chunk = []
self.undef(args)
elif name == 'ifdef':
ifstack.append((enable,iftrigger))
if enable:
if not args[0].value in self.macros:
enable = False
iftrigger = False
else:
iftrigger = True
elif name == 'ifndef':
ifstack.append((enable,iftrigger))
if enable:
if args[0].value in self.macros:
enable = False
iftrigger = False
else:
iftrigger = True
elif name == 'if':
ifstack.append((enable,iftrigger))
if enable:
result = self.evalexpr(args)
if not result:
enable = False
iftrigger = False
else:
iftrigger = True
elif name == 'elif':
if ifstack:
if ifstack[-1][0]: # We only pay attention if outer "if" allows this
if enable: # If already true, we flip enable False
enable = False
elif not iftrigger: # If False, but not triggered yet, we'll check expression
result = self.evalexpr(args)
if result:
enable = True
iftrigger = True
else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
elif name == 'else':
if ifstack:
if ifstack[-1][0]:
if enable:
enable = False
elif not iftrigger:
enable = True
iftrigger = True
else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
elif name == 'endif':
if ifstack:
enable,iftrigger = ifstack.pop()
else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
else:
# Unknown preprocessor directive
pass
else:
# Normal text
if enable:
chunk.extend(x)
for tok in self.expand_macros(chunk):
yield tok
chunk = []
# ----------------------------------------------------------------------
# include()
#
# Implementation of file-inclusion
# ----------------------------------------------------------------------
def include(self,tokens):
# Try to extract the filename and then process an include file
if not tokens:
return
if tokens:
if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
tokens = self.expand_macros(tokens)
if tokens[0].value == '<':
# Include <...>
i = 1
while i < len(tokens):
if tokens[i].value == '>':
break
i += 1
else:
print "Malformed #include <...>"
return
filename = "".join([x.value for x in tokens[1:i]])
path = self.path + [""] + self.temp_path
elif tokens[0].type == self.t_STRING:
filename = tokens[0].value[1:-1]
path = self.temp_path + [""] + self.path
else:
print "Malformed #include statement"
return
for p in path:
iname = os.path.join(p,filename)
try:
data = open(iname,"r").read()
dname = os.path.dirname(iname)
if dname:
self.temp_path.insert(0,dname)
for tok in self.parsegen(data,filename):
yield tok
if dname:
del self.temp_path[0]
break
except IOError,e:
pass
else:
print "Couldn't find '%s'" % filename
# ----------------------------------------------------------------------
# define()
#
# Define a new macro
# ----------------------------------------------------------------------
def define(self,tokens):
if isinstance(tokens,(str,unicode)):
tokens = self.tokenize(tokens)
linetok = tokens
try:
name = linetok[0]
if len(linetok) > 1:
mtype = linetok[1]
else:
mtype = None
if not mtype:
m = Macro(name.value,[])
self.macros[name.value] = m
elif mtype.type in self.t_WS:
# A normal macro
m = Macro(name.value,self.tokenstrip(linetok[2:]))
self.macros[name.value] = m
elif mtype.value == '(':
# A macro with arguments
tokcount, args, positions = self.collect_args(linetok[1:])
variadic = False
for a in args:
if variadic:
print "No more arguments may follow a variadic argument"
break
astr = "".join([str(_i.value) for _i in a])
if astr == "...":
variadic = True
a[0].type = self.t_ID
a[0].value = '__VA_ARGS__'
variadic = True
del a[1:]
continue
elif astr[-3:] == "..." and a[0].type == self.t_ID:
variadic = True
del a[1:]
# If, for some reason, "." is part of the identifier, strip off the name for the purposes
# of macro expansion
if a[0].value[-3:] == '...':
a[0].value = a[0].value[:-3]
continue
if len(a) > 1 or a[0].type != self.t_ID:
print "Invalid macro argument"
break
else:
mvalue = self.tokenstrip(linetok[1+tokcount:])
i = 0
while i < len(mvalue):
if i+1 < len(mvalue):
if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
del mvalue[i]
continue
elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
del mvalue[i+1]
i += 1
m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
self.macro_prescan(m)
self.macros[name.value] = m
else:
print "Bad macro definition"
except LookupError:
print "Bad macro definition"
# ----------------------------------------------------------------------
# undef()
#
# Undefine a macro
# ----------------------------------------------------------------------
def undef(self,tokens):
id = tokens[0].value
try:
del self.macros[id]
except LookupError:
pass
# ----------------------------------------------------------------------
# parse()
#
# Parse input text.
# ----------------------------------------------------------------------
def parse(self,input,source=None,ignore={}):
self.ignore = ignore
self.parser = self.parsegen(input,source)
# ----------------------------------------------------------------------
# token()
#
# Method to return individual tokens
# ----------------------------------------------------------------------
def token(self):
try:
while True:
tok = self.parser.next()
if tok.type not in self.ignore: return tok
except StopIteration:
self.parser = None
return None
if __name__ == '__main__':
import ply.lex as lex
lexer = lex.lex()
# Run a preprocessor
import sys
f = open(sys.argv[1])
input = f.read()
p = Preprocessor(lexer)
p.parse(input,sys.argv[1])
while True:
tok = p.token()
if not tok: break
print p.source, tok

View File

@ -0,0 +1,133 @@
# ----------------------------------------------------------------------
# ctokens.py
#
# Token specifications for symbols in ANSI C and C++. This file is
# meant to be used as a library in other tokenizers.
# ----------------------------------------------------------------------
# Reserved words
tokens = [
# Literals (identifier, integer constant, float constant, string constant, char const)
'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
# Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
# Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
# Increment/decrement (++,--)
'PLUSPLUS', 'MINUSMINUS',
# Structure dereference (->)
'ARROW',
# Ternary operator (?)
'TERNARY',
# Delimeters ( ) [ ] { } , . ; :
'LPAREN', 'RPAREN',
'LBRACKET', 'RBRACKET',
'LBRACE', 'RBRACE',
'COMMA', 'PERIOD', 'SEMI', 'COLON',
# Ellipsis (...)
'ELLIPSIS',
]
# Operators
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MODULO = r'%'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
t_LSHIFT = r'<<'
t_RSHIFT = r'>>'
t_LOR = r'\|\|'
t_LAND = r'&&'
t_LNOT = r'!'
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_EQ = r'=='
t_NE = r'!='
# Assignment operators
t_EQUALS = r'='
t_TIMESEQUAL = r'\*='
t_DIVEQUAL = r'/='
t_MODEQUAL = r'%='
t_PLUSEQUAL = r'\+='
t_MINUSEQUAL = r'-='
t_LSHIFTEQUAL = r'<<='
t_RSHIFTEQUAL = r'>>='
t_ANDEQUAL = r'&='
t_OREQUAL = r'\|='
t_XOREQUAL = r'^='
# Increment/decrement
t_INCREMENT = r'\+\+'
t_DECREMENT = r'--'
# ->
t_ARROW = r'->'
# ?
t_TERNARY = r'\?'
# Delimeters
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
t_ELLIPSIS = r'\.\.\.'
# Identifiers
t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
# Integer literal
t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
# Floating literal
t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
# String literal
t_STRING = r'\"([^\\\n]|(\\.))*?\"'
# Character constant 'c' or L'c'
t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
# Comment (C-Style)
def t_COMMENT(t):
r'/\*(.|\n)*?\*/'
t.lexer.lineno += t.value.count('\n')
return t
# Comment (C++-Style)
def t_CPPCOMMENT(t):
r'//.*\n'
t.lexer.lineno += 1
return t

1021
plugins/pycparser/ply/lex.py Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,75 @@
#-----------------------------------------------------------------
# pycparser: __init__.py
#
# This package file exports some convenience functions for
# interacting with pycparser
#
# Copyright (C) 2008-2009, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
__all__ = ['c_lexer', 'c_parser', 'c_ast']
__version__ = '1.05'
from subprocess import Popen, PIPE
from types import ListType
from c_parser import CParser
def parse_file( filename, use_cpp=False,
cpp_path='cpp', cpp_args=''):
""" Parse a C file using pycparser.
filename:
Name of the file you want to parse.
use_cpp:
Set to True if you want to execute the C pre-processor
on the file prior to parsing it.
cpp_path:
If use_cpp is True, this is the path to 'cpp' on your
system. If no path is provided, it attempts to just
execute 'cpp', so it must be in your PATH.
cpp_args:
If use_cpp is True, set this to the command line
arguments strings to cpp. Be careful with quotes -
it's best to pass a raw string (r'') here.
For example:
r'-I../utils/fake_libc_include'
If several arguments are required, pass a list of
strings.
When successful, an AST is returned. ParseError can be
thrown if the file doesn't parse successfully.
Errors from cpp will be printed out.
"""
if use_cpp:
path_list = [cpp_path]
if isinstance(cpp_args, ListType):
path_list += cpp_args
elif cpp_args != '':
path_list += [cpp_args]
path_list += [filename]
# Note the use of universal_newlines to treat all newlines
# as \n for Python's purpose
#
pipe = Popen( path_list,
stdout=PIPE,
universal_newlines=True)
text = pipe.communicate()[0]
else:
text = open(filename).read()
parser = CParser()
return parser.parse(text, filename)
if __name__ == "__main__":
pass

View File

@ -0,0 +1,249 @@
#-----------------------------------------------------------------
# _ast_gen.py
#
# Generates the AST Node classes from a specification given in
# a .yaml file
#
# The design of this module was inspired by astgen.py from the
# Python 2.5 code-base.
#
# Copyright (C) 2008-2009, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
import pprint
from string import Template
import yaml
class ASTCodeGenerator(object):
def __init__(self, cfg_filename='_c_ast.yaml'):
""" Initialize the code generator from a configuration
file.
"""
self.cfg_filename = cfg_filename
cfg = yaml.load(open(cfg_filename).read())
self.node_cfg = [NodeCfg(name, cfg[name]) for name in cfg]
#~ pprint.pprint(self.node_cfg)
#~ print ''
def generate(self, file=None):
""" Generates the code into file, an open file buffer.
"""
src = Template(_PROLOGUE_COMMENT).substitute(
cfg_filename=self.cfg_filename)
src += _PROLOGUE_CODE
for node_cfg in self.node_cfg:
src += node_cfg.generate_source() + '\n\n'
file.write(src)
class NodeCfg(object):
def __init__(self, name, contents):
self.name = name
self.all_entries = []
self.attr = []
self.child = []
self.seq_child = []
for entry in contents:
clean_entry = entry.rstrip('*')
self.all_entries.append(clean_entry)
if entry.endswith('**'):
self.seq_child.append(clean_entry)
elif entry.endswith('*'):
self.child.append(clean_entry)
else:
self.attr.append(entry)
def generate_source(self):
src = self._gen_init()
src += '\n' + self._gen_children()
src += '\n' + self._gen_show()
return src
def _gen_init(self):
src = "class %s(Node):\n" % self.name
if self.all_entries:
args = ', '.join(self.all_entries)
arglist = '(self, %s, coord=None)' % args
else:
arglist = '(self, coord=None)'
src += " def __init__%s:\n" % arglist
for name in self.all_entries + ['coord']:
src += " self.%s = %s\n" % (name, name)
return src
def _gen_children(self):
src = ' def children(self):\n'
if self.all_entries:
src += ' nodelist = []\n'
template = ('' +
' if self.%s is not None:' +
' nodelist.%s(self.%s)\n')
for child in self.child:
src += template % (
child, 'append', child)
for seq_child in self.seq_child:
src += template % (
seq_child, 'extend', seq_child)
src += ' return tuple(nodelist)\n'
else:
src += ' return ()\n'
return src
def _gen_show(self):
src = ' def show(self, buf=sys.stdout, offset=0, attrnames=False, showcoord=False):\n'
src += " lead = ' ' * offset\n"
src += " buf.write(lead + '%s: ')\n\n" % self.name
if self.attr:
src += " if attrnames:\n"
src += " attrstr = ', '.join('%s=%s' % nv for nv in ["
src += ', '.join('("%s", repr(%s))' % (nv, 'self.%s' % nv) for nv in self.attr)
src += '])\n'
src += " else:\n"
src += " attrstr = ', '.join('%s' % v for v in ["
src += ', '.join('self.%s' % v for v in self.attr)
src += '])\n'
src += " buf.write(attrstr)\n\n"
src += " if showcoord:\n"
src += " buf.write(' (at %s)' % self.coord)\n"
src += " buf.write('\\n')\n\n"
src += " for c in self.children():\n"
src += " c.show(buf, offset + 2, attrnames, showcoord)\n"
return src
_PROLOGUE_COMMENT = \
r'''#-----------------------------------------------------------------
# ** ATTENTION **
# This code was automatically generated from the file:
# $cfg_filename
#
# Do not modify it directly. Modify the configuration file and
# run the generator again.
# ** ** *** ** **
#
# pycparser: c_ast.py
#
# AST Node classes.
#
# Copyright (C) 2008, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
'''
_PROLOGUE_CODE = r'''
import sys
class Node(object):
""" Abstract base class for AST nodes.
"""
def children(self):
""" A sequence of all children that are Nodes
"""
pass
def show(self, buf=sys.stdout, offset=0, attrnames=False, showcoord=False):
""" Pretty print the Node and all its attributes and
children (recursively) to a buffer.
file:
Open IO buffer into which the Node is printed.
offset:
Initial offset (amount of leading spaces)
attrnames:
True if you want to see the attribute names in
name=value pairs. False to only see the values.
showcoord:
Do you want the coordinates of each Node to be
displayed.
"""
pass
class NodeVisitor(object):
""" A base NodeVisitor class for visiting c_ast nodes.
Subclass it and define your own visit_XXX methods, where
XXX is the class name you want to visit with these
methods.
For example:
class ConstantVisitor(NodeVisitor):
def __init__(self):
self.values = []
def visit_Constant(self, node):
self.values.append(node.value)
Creates a list of values of all the constant nodes
encountered below the given node. To use it:
cv = ConstantVisitor()
cv.visit(node)
Notes:
* generic_visit() will be called for AST nodes for which
no visit_XXX method was defined.
* The children of nodes for which a visit_XXX was
defined will not be visited - if you need this, call
generic_visit() on the node.
You can use:
NodeVisitor.generic_visit(self, node)
* Modeled after Python's own AST visiting facilities
(the ast module of Python 3.0)
"""
def visit(self, node):
""" Visit a node.
"""
method = 'visit_' + node.__class__.__name__
visitor = getattr(self, method, self.generic_visit)
return visitor(node)
def generic_visit(self, node):
""" Called if no explicit visitor function exists for a
node. Implements preorder visiting of the node.
"""
for c in node.children():
self.visit(c)
'''
if __name__ == "__main__":
import sys
ast_gen = ASTCodeGenerator('_c_ast.yaml')
ast_gen.generate(open('c_ast.py', 'w'))

View File

@ -0,0 +1,31 @@
#-----------------------------------------------------------------
# pycparser: _build_tables.py
#
# A dummy for generating the lexing/parsing tables and and
# compiling them into .pyc for faster execution in optimized mode.
# Also generates AST code from the _c_ast.yaml configuration file.
#
# Copyright (C) 2008, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
# Generate c_ast.py
#
from _ast_gen import ASTCodeGenerator
ast_gen = ASTCodeGenerator('_c_ast.yaml')
ast_gen.generate(open('c_ast.py', 'w'))
import c_parser
# Generates the tables
#
c_parser.CParser(
lex_optimize=True,
yacc_debug=False,
yacc_optimize=True)
# Load to compile into .pyc
#
import lextab
import yacctab
import c_ast

View File

@ -0,0 +1,164 @@
#-----------------------------------------------------------------
# pycparser: _c_ast_gen.yaml
#
# Defines the AST Node classes used in pycparser.
#
# Each entry is a Node sub-class name, listing the attributes
# and child nodes of the class:
# <name>* - a child node
# <name>** - a sequence of child nodes
# <name> - an attribute
#
# Copyright (C) 2008-2009, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
ArrayDecl: [type*, dim*]
ArrayRef: [name*, subscript*]
# op: =, +=, /= etc.
#
Assignment: [op, lvalue*, rvalue*]
BinaryOp: [op, left*, right*]
Break: []
Case: [expr*, stmt*]
Cast: [to_type*, expr*]
# Compound statement: { declarations... statements...}
#
Compound: [decls**, stmts**]
# type: int, char, float, etc. see CLexer for constant token types
#
Constant: [type, value]
Continue: []
# name: the variable being declared
# quals: list of qualifiers (const, volatile)
# storage: list of storage specifiers (extern, register, etc.)
# type: declaration type (probably nested with all the modifiers)
# init: initialization value, or None
# bitsize: bit field size, or None
#
Decl: [name, quals, storage, type*, init*, bitsize*]
Default: [stmt*]
DoWhile: [cond*, stmt*]
# Represents the ellipsis (...) parameter in a function
# declaration
#
EllipsisParam: []
# Enumeration type specifier
# name: an optional ID
# values: an EnumeratorList
#
Enum: [name, values*]
# A name/value pair for enumeration values
#
Enumerator: [name, value*]
# A list of enumerators
#
EnumeratorList: [enumerators**]
# a list of comma separated expressions
#
ExprList: [exprs**]
# This is the top of the AST, representing a single C file (a
# translation unit in K&R jargon). It contains a list of
# "external-declaration"s, which is either declarations (Decl),
# Typedef or function definitions (FuncDef).
#
FileAST: [ext**]
# for (init; cond; next) stmt
#
For: [init*, cond*, next*, stmt*]
# name: Id
# args: ExprList
#
FuncCall: [name*, args*]
# type <decl>(args)
#
FuncDecl: [args*, type*]
# Function definition: a declarator for the function name and
# a body, which is a compound statement.
# There's an optional list of parameter declarations for old
# K&R-style definitions
#
FuncDef: [decl*, param_decls**, body*]
Goto: [name]
ID: [name]
# Holder for types that are a simple identifier (e.g. the built
# ins void, char etc. and typedef-defined types)
#
IdentifierType: [names]
If: [cond*, iftrue*, iffalse*]
Label: [name, stmt*]
# a list of comma separated function parameter declarations
#
ParamList: [params**]
PtrDecl: [quals, type*]
Return: [expr*]
# name: struct tag name
# decls: declaration of members
#
Struct: [name, decls**]
# type: . or ->
# name.field or name->field
#
StructRef: [name*, type, field*]
Switch: [cond*, stmt*]
# cond ? iftrue : iffalse
#
TernaryOp: [cond*, iftrue*, iffalse*]
# A base type declaration
#
TypeDecl: [declname, quals, type*]
# A typedef declaration.
# Very similar to Decl, but without some attributes
#
Typedef: [name, quals, storage, type*]
Typename: [quals, type*]
UnaryOp: [op, expr*]
# name: union tag name
# decls: declaration of members
#
Union: [name, decls**]
While: [cond*, stmt*]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,443 @@
#-----------------------------------------------------------------
# pycparser: clex.py
#
# CLexer class: lexer for the C language
#
# Copyright (C) 2008, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
import re
import sys
import ply.lex
from ply.lex import TOKEN
class CLexer(object):
""" A lexer for the C language. After building it, set the
input text with input(), and call token() to get new
tokens.
The public attribute filename can be set to an initial
filaneme, but the lexer will update it upon #line
directives.
"""
def __init__(self, error_func, type_lookup_func):
""" Create a new Lexer.
error_func:
An error function. Will be called with an error
message, line and column as arguments, in case of
an error during lexing.
type_lookup_func:
A type lookup function. Given a string, it must
return True IFF this string is a name of a type
that was defined with a typedef earlier.
"""
self.error_func = error_func
self.type_lookup_func = type_lookup_func
self.filename = ''
# Allow either "# line" or "# <num>" to support GCC's
# cpp output
#
self.line_pattern = re.compile('([ \t]*line\W)|([ \t]*\d+)')
def build(self, **kwargs):
""" Builds the lexer from the specification. Must be
called after the lexer object is created.
This method exists separately, because the PLY
manual warns against calling lex.lex inside
__init__
"""
self.lexer = ply.lex.lex(object=self, **kwargs)
def reset_lineno(self):
""" Resets the internal line number counter of the lexer.
"""
self.lexer.lineno = 1
def input(self, text):
self.lexer.input(text)
def token(self):
g = self.lexer.token()
return g
######################-- PRIVATE --######################
##
## Internal auxiliary methods
##
def _error(self, msg, token):
location = self._make_tok_location(token)
self.error_func(msg, location[0], location[1])
self.lexer.skip(1)
def _find_tok_column(self, token):
i = token.lexpos
while i > 0:
if self.lexer.lexdata[i] == '\n': break
i -= 1
return (token.lexpos - i) + 1
def _make_tok_location(self, token):
return (token.lineno, self._find_tok_column(token))
##
## Reserved keywords
##
keywords = (
'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE',
'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN',
'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT',
'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID',
'VOLATILE', 'WHILE',
)
keyword_map = {}
for r in keywords:
keyword_map[r.lower()] = r
##
## All the tokens recognized by the lexer
##
tokens = keywords + (
# Identifiers
'ID',
# Type identifiers (identifiers previously defined as
# types with typedef)
'TYPEID',
# constants
'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX',
'FLOAT_CONST',
'CHAR_CONST',
'WCHAR_CONST',
# String literals
'STRING_LITERAL',
'WSTRING_LITERAL',
# Operators
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
# Assignment
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL',
'PLUSEQUAL', 'MINUSEQUAL',
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL',
'OREQUAL',
# Increment/decrement
'PLUSPLUS', 'MINUSMINUS',
# Structure dereference (->)
'ARROW',
# Conditional operator (?)
'CONDOP',
# Delimeters
'LPAREN', 'RPAREN', # ( )
'LBRACKET', 'RBRACKET', # [ ]
'LBRACE', 'RBRACE', # { }
'COMMA', 'PERIOD', # . ,
'SEMI', 'COLON', # ; :
# Ellipsis (...)
'ELLIPSIS',
# pre-processor
'PPHASH', # '#'
)
##
## Regexes for use in tokens
##
##
# valid C identifiers (K&R2: A.2.3)
identifier = r'[a-zA-Z_][0-9a-zA-Z_]*'
# integer constants (K&R2: A.2.5.1)
integer_suffix_opt = r'(([uU][lL])|([lL][uU])|[uU]|[lL])?'
decimal_constant = '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')'
octal_constant = '0[0-7]*'+integer_suffix_opt
hex_constant = '0[xX][0-9a-fA-F]+'+integer_suffix_opt
bad_octal_constant = '0[0-7]*[89]'
# character constants (K&R2: A.2.5.2)
# Note: a-zA-Z are allowed as escape chars to support #line
# directives with Windows paths as filenames (\dir\file...)
#
simple_escape = r"""([a-zA-Z\\?'"])"""
octal_escape = r"""([0-7]{1,3})"""
hex_escape = r"""(x[0-9a-fA-F]+)"""
bad_escape = r"""([\\][^a-zA-Z\\?'"x0-7])"""
escape_sequence = r"""(\\("""+simple_escape+'|'+octal_escape+'|'+hex_escape+'))'
cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
char_const = "'"+cconst_char+"'"
wchar_const = 'L'+char_const
unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')"""
# string literals (K&R2: A.2.6)
string_char = r"""([^"\\\n]|"""+escape_sequence+')'
string_literal = '"'+string_char+'*"'
wstring_literal = 'L'+string_literal
bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
# floating constants (K&R2: A.2.5.3)
exponent_part = r"""([eE][-+]?[0-9]+)"""
fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
floating_constant = '(((('+fractional_constant+')'+exponent_part+'?)|([0-9]+'+exponent_part+'))[FfLl]?)'
##
## Lexer states
##
states = (
# ppline: preprocessor line directives
#
('ppline', 'exclusive'),
)
def t_PPHASH(self, t):
r'[ \t]*\#'
m = self.line_pattern.match(
t.lexer.lexdata, pos=t.lexer.lexpos)
if m:
t.lexer.begin('ppline')
self.pp_line = self.pp_filename = None
#~ print "ppline starts on line %s" % t.lexer.lineno
else:
t.type = 'PPHASH'
return t
##
## Rules for the ppline state
##
@TOKEN(string_literal)
def t_ppline_FILENAME(self, t):
if self.pp_line is None:
self._error('filename before line number in #line', t)
else:
self.pp_filename = t.value.lstrip('"').rstrip('"')
#~ print "PP got filename: ", self.pp_filename
@TOKEN(decimal_constant)
def t_ppline_LINE_NUMBER(self, t):
if self.pp_line is None:
self.pp_line = t.value
else:
# Ignore: GCC's cpp sometimes inserts a numeric flag
# after the file name
pass
def t_ppline_NEWLINE(self, t):
r'\n'
if self.pp_line is None:
self._error('line number missing in #line', t)
else:
self.lexer.lineno = int(self.pp_line)
if self.pp_filename is not None:
self.filename = self.pp_filename
t.lexer.begin('INITIAL')
def t_ppline_PPLINE(self, t):
r'line'
pass
t_ppline_ignore = ' \t'
def t_ppline_error(self, t):
msg = 'invalid #line directive'
self._error(msg, t)
##
## Rules for the normal state
##
t_ignore = ' \t'
# Newlines
def t_NEWLINE(self, t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
# Operators
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MOD = r'%'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
t_LSHIFT = r'<<'
t_RSHIFT = r'>>'
t_LOR = r'\|\|'
t_LAND = r'&&'
t_LNOT = r'!'
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_EQ = r'=='
t_NE = r'!='
# Assignment operators
t_EQUALS = r'='
t_TIMESEQUAL = r'\*='
t_DIVEQUAL = r'/='
t_MODEQUAL = r'%='
t_PLUSEQUAL = r'\+='
t_MINUSEQUAL = r'-='
t_LSHIFTEQUAL = r'<<='
t_RSHIFTEQUAL = r'>>='
t_ANDEQUAL = r'&='
t_OREQUAL = r'\|='
t_XOREQUAL = r'^='
# Increment/decrement
t_PLUSPLUS = r'\+\+'
t_MINUSMINUS = r'--'
# ->
t_ARROW = r'->'
# ?
t_CONDOP = r'\?'
# Delimeters
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
t_ELLIPSIS = r'\.\.\.'
t_STRING_LITERAL = string_literal
# The following floating and integer constants are defined as
# functions to impose a strict order (otherwise, decimal
# is placed before the others because its regex is longer,
# and this is bad)
#
@TOKEN(floating_constant)
def t_FLOAT_CONST(self, t):
return t
@TOKEN(hex_constant)
def t_INT_CONST_HEX(self, t):
return t
@TOKEN(bad_octal_constant)
def t_BAD_CONST_OCT(self, t):
msg = "Invalid octal constant"
self._error(msg, t)
@TOKEN(octal_constant)
def t_INT_CONST_OCT(self, t):
return t
@TOKEN(decimal_constant)
def t_INT_CONST_DEC(self, t):
return t
# Must come before bad_char_const, to prevent it from
# catching valid char constants as invalid
#
@TOKEN(char_const)
def t_CHAR_CONST(self, t):
return t
@TOKEN(wchar_const)
def t_WCHAR_CONST(self, t):
return t
@TOKEN(unmatched_quote)
def t_UNMATCHED_QUOTE(self, t):
msg = "Unmatched '"
self._error(msg, t)
@TOKEN(bad_char_const)
def t_BAD_CHAR_CONST(self, t):
msg = "Invalid char constant %s" % t.value
self._error(msg, t)
@TOKEN(wstring_literal)
def t_WSTRING_LITERAL(self, t):
return t
# unmatched string literals are caught by the preprocessor
@TOKEN(bad_string_literal)
def t_BAD_STRING_LITERAL(self, t):
msg = "String contains invalid escape code"
self._error(msg, t)
@TOKEN(identifier)
def t_ID(self, t):
t.type = self.keyword_map.get(t.value, "ID")
if t.type == 'ID' and self.type_lookup_func(t.value):
t.type = "TYPEID"
return t
def t_error(self, t):
msg = 'Illegal character %s' % repr(t.value[0])
self._error(msg, t)
if __name__ == "__main__":
filename = '../zp.c'
text = open(filename).read()
#~ text = '"'+r"""ka \p ka"""+'"'
text = r"""
546
#line 66 "kwas\df.h"
id 4
# 5
dsf
"""
def errfoo(msg, a, b):
print msg
sys.exit()
def typelookup(namd):
return False
clex = CLexer(errfoo, typelookup)
clex.build()
clex.input(text)
while 1:
tok = clex.token()
if not tok: break
#~ print type(tok)
print "-", tok.value, tok.type, tok.lineno, clex.filename, tok.lexpos

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,67 @@
#-----------------------------------------------------------------
# plyparser.py
#
# PLYParser class and other utilites for simplifying programming
# parsers with PLY
#
# Copyright (C) 2008-2009, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
class Coord(object):
""" Coordinates of a syntactic element. Consists of:
- File name
- Line number
- (optional) column number, for the Lexer
"""
def __init__(self, file, line, column=None):
self.file = file
self.line = line
self.column = column
def __str__(self):
str = "%s:%s" % (self.file, self.line)
if self.column: str += ":%s" % self.column
return str
class ParseError(Exception): pass
class PLYParser(object):
def _create_opt_rule(self, rulename):
""" Given a rule name, creates an optional ply.yacc rule
for it. The name of the optional rule is
<rulename>_opt
"""
optname = rulename + '_opt'
def optrule(self, p):
p[0] = p[1]
optrule.__doc__ = '%s : empty\n| %s' % (optname, rulename)
optrule.__name__ = 'p_%s' % optname
setattr(self.__class__, optrule.__name__, optrule)
def _coord(self, lineno, column=None):
return Coord(
file=self.clex.filename,
line=lineno,
column=column)
def _parse_error(self, msg, coord):
raise ParseError("%s: %s" % (coord, msg))
if __name__ == '__main__':
pp = PLYParser()
pp._create_opt_rule('java')
ar = [4, 6]
pp.p_java_opt(ar)
print ar
print pp.p_java_opt.__doc__
print dir(pp)

File diff suppressed because one or more lines are too long