470 lines
13 KiB
Python
470 lines
13 KiB
Python
#!/usr/bin/python
|
|
import os
|
|
import sys
|
|
import re
|
|
|
|
import ply.lex as lex
|
|
|
|
import inspect
|
|
import lutinDebug as debug
|
|
import lutinTools
|
|
import Class
|
|
|
|
tokens = [
|
|
'NUMBER',
|
|
'NAME',
|
|
'OPEN_PAREN',
|
|
'CLOSE_PAREN',
|
|
'OPEN_BRACE',
|
|
'CLOSE_BRACE',
|
|
'OPEN_SQUARE_BRACKET',
|
|
'CLOSE_SQUARE_BRACKET',
|
|
'COLON',
|
|
'SEMI_COLON',
|
|
'COMMA',
|
|
'TAB',
|
|
'BACKSLASH',
|
|
'PIPE',
|
|
'PERCENT',
|
|
'EXCLAMATION',
|
|
'CARET',
|
|
'COMMENT_SINGLELINE',
|
|
'COMMENT_MULTILINE',
|
|
'PRECOMP_MACRO',
|
|
'PRECOMP_MACRO_CONT',
|
|
'ASTERISK',
|
|
'AMPERSTAND',
|
|
'EQUALS',
|
|
'MINUS',
|
|
'PLUS',
|
|
'DIVIDE',
|
|
'CHAR_LITERAL',
|
|
'STRING_LITERAL',
|
|
'NEW_LINE',
|
|
'SQUOTE',
|
|
]
|
|
|
|
t_ignore = " \r.?@\f"
|
|
t_NUMBER = r'[0-9][0-9XxA-Fa-f]*'
|
|
t_NAME = r'[<>A-Za-z_~][A-Za-z0-9_]*'
|
|
t_OPEN_PAREN = r'\('
|
|
t_CLOSE_PAREN = r'\)'
|
|
t_OPEN_BRACE = r'{'
|
|
t_CLOSE_BRACE = r'}'
|
|
t_OPEN_SQUARE_BRACKET = r'\['
|
|
t_CLOSE_SQUARE_BRACKET = r'\]'
|
|
t_SEMI_COLON = r';'
|
|
t_COLON = r':'
|
|
t_COMMA = r','
|
|
t_TAB = r'\t'
|
|
t_BACKSLASH = r'\\'
|
|
t_PIPE = r'\|'
|
|
t_PERCENT = r'%'
|
|
t_CARET = r'\^'
|
|
t_EXCLAMATION = r'!'
|
|
t_PRECOMP_MACRO = r'\#.*'
|
|
t_PRECOMP_MACRO_CONT = r'.*\\\n'
|
|
def t_COMMENT_SINGLELINE(t):
|
|
r'\/\/.*\n'
|
|
global doxygenCommentCache
|
|
if t.value.startswith("///") or t.value.startswith("//!"):
|
|
if doxygenCommentCache:
|
|
doxygenCommentCache += "\n"
|
|
if t.value.endswith("\n"):
|
|
doxygenCommentCache += t.value[:-1]
|
|
else:
|
|
doxygenCommentCache += t.value
|
|
t.lexer.lineno += len(filter(lambda a: a=="\n", t.value))
|
|
t_ASTERISK = r'\*'
|
|
t_MINUS = r'\-'
|
|
t_PLUS = r'\+'
|
|
t_DIVIDE = r'/(?!/)'
|
|
t_AMPERSTAND = r'&'
|
|
t_EQUALS = r'='
|
|
t_CHAR_LITERAL = "'.'"
|
|
t_SQUOTE = "'"
|
|
#found at http://wordaligned.org/articles/string-literals-and-regular-expressions
|
|
#TODO: This does not work with the string "bla \" bla"
|
|
t_STRING_LITERAL = r'"([^"\\]|\\.)*"'
|
|
#Found at http://ostermiller.org/findcomment.html
|
|
def t_COMMENT_MULTILINE(t):
|
|
r'/\*([^*]|\n|(\*+([^*/]|\n)))*\*+/'
|
|
global doxygenCommentCache
|
|
if t.value.startswith("/**") or t.value.startswith("/*!"):
|
|
#not sure why, but get double new lines
|
|
v = t.value.replace("\n\n", "\n")
|
|
#strip prefixing whitespace
|
|
v = re.sub("\n[\s]+\*", "\n*", v)
|
|
doxygenCommentCache += v
|
|
t.lexer.lineno += len(filter(lambda a: a=="\n", t.value))
|
|
def t_NEWLINE(t):
|
|
r'\n+'
|
|
t.lexer.lineno += len(t.value)
|
|
|
|
def t_error(v):
|
|
print( "Lex error: ", v )
|
|
|
|
lex.lex()
|
|
|
|
|
|
class TagStr(str):
|
|
"""Wrapper for a string that allows us to store the line number associated with it"""
|
|
lineno_reg = {}
|
|
def __new__(cls,*args,**kw):
|
|
new_obj = str.__new__(cls,*args)
|
|
if "lineno" in kw:
|
|
TagStr.lineno_reg[id(new_obj)] = kw["lineno"]
|
|
return new_obj
|
|
|
|
def __del__(self):
|
|
try:
|
|
del TagStr.lineno_reg[id(self)]
|
|
except: pass
|
|
|
|
def lineno(self):
|
|
return TagStr.lineno_reg.get(id(self), -1)
|
|
|
|
|
|
doxygenCommentCache = ""
|
|
|
|
#Track what was added in what order and at what depth
|
|
parseHistory = []
|
|
|
|
def is_namespace(nameStack):
|
|
"""Determines if a namespace is being specified"""
|
|
if len(nameStack) == 0:
|
|
return False
|
|
if nameStack[0] == "namespace":
|
|
return True
|
|
return False
|
|
|
|
def is_enum_namestack(nameStack):
|
|
"""Determines if a namestack is an enum namestack"""
|
|
if len(nameStack) == 0:
|
|
return False
|
|
if nameStack[0] == "enum":
|
|
return True
|
|
if len(nameStack) > 1 \
|
|
and nameStack[0] == "typedef" \
|
|
and nameStack[1] == "enum":
|
|
return True
|
|
return False
|
|
|
|
def is_fundamental(s):
|
|
for a in s.split():
|
|
if a not in ["size_t", \
|
|
"struct", \
|
|
"union", \
|
|
"unsigned", \
|
|
"signed", \
|
|
"bool", \
|
|
"char", \
|
|
"short", \
|
|
"int", \
|
|
"float", \
|
|
"double", \
|
|
"long", \
|
|
"void", \
|
|
"*"]:
|
|
return False
|
|
return True
|
|
|
|
def is_function_pointer_stack(stack):
|
|
"""Count how many non-nested paranthesis are in the stack. Useful for determining if a stack is a function pointer"""
|
|
paren_depth = 0
|
|
paren_count = 0
|
|
star_after_first_paren = False
|
|
last_e = None
|
|
for e in stack:
|
|
if e == "(":
|
|
paren_depth += 1
|
|
elif e == ")" \
|
|
and paren_depth > 0:
|
|
paren_depth -= 1
|
|
if paren_depth == 0:
|
|
paren_count += 1
|
|
elif e == "*" \
|
|
and last_e == "(" \
|
|
and paren_count == 0 \
|
|
and paren_depth == 1:
|
|
star_after_first_paren = True
|
|
last_e = e
|
|
if star_after_first_paren and paren_count == 2:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def is_method_namestack(stack):
|
|
r = False
|
|
if '(' not in stack:
|
|
r = False
|
|
elif stack[0] == 'typedef':
|
|
r = False # TODO deal with typedef function prototypes
|
|
#elif '=' in stack and stack.index('=') < stack.index('(') and stack[stack.index('=')-1] != 'operator': r = False #disabled July6th - allow all operators
|
|
elif 'operator' in stack:
|
|
r = True # allow all operators
|
|
elif '{' in stack \
|
|
and stack.index('{') < stack.index('('):
|
|
r = False # struct that looks like a method/class
|
|
elif '(' in stack \
|
|
and ')' in stack:
|
|
if '{' in stack \
|
|
and '}' in stack:
|
|
r = True
|
|
elif stack[-1] == ';':
|
|
if is_function_pointer_stack(stack):
|
|
r = False
|
|
else:
|
|
r = True
|
|
elif '{' in stack:
|
|
r = True # ideally we catch both braces... TODO
|
|
else:
|
|
r = False
|
|
#Test for case of property set to something with parens such as "static const int CONST_A = (1 << 7) - 1;"
|
|
if r \
|
|
and "(" in stack \
|
|
and "=" in stack \
|
|
and 'operator' not in stack:
|
|
if stack.index("=") < stack.index("("): r = False
|
|
return r
|
|
|
|
def is_property_namestack(nameStack):
|
|
r = False
|
|
if '(' not in nameStack \
|
|
and ')' not in nameStack:
|
|
r = True
|
|
elif "(" in nameStack \
|
|
and "=" in nameStack \
|
|
and nameStack.index("=") < nameStack.index("("):
|
|
r = True
|
|
#See if we are a function pointer
|
|
if not r \
|
|
and is_function_pointer_stack(nameStack):
|
|
r = True
|
|
return r
|
|
|
|
def detect_lineno(s):
|
|
"""Detect the line number for a given token string"""
|
|
try:
|
|
rtn = s.lineno()
|
|
if rtn != -1:
|
|
return rtn
|
|
except: pass
|
|
global curLine
|
|
return curLine
|
|
|
|
def filter_out_attribute_keyword(stack):
|
|
"""Strips __attribute__ and its parenthetical expression from the stack"""
|
|
if "__attribute__" not in stack:
|
|
return stack
|
|
try:
|
|
debug.debug("Stripping __attribute__ from %s"% stack)
|
|
attr_index = stack.index("__attribute__")
|
|
attr_end = attr_index + 1 #Assuming not followed by parenthetical expression which wont happen
|
|
#Find final paren
|
|
if stack[attr_index + 1] == '(':
|
|
paren_count = 1
|
|
for i in xrange(attr_index + 2, len(stack)):
|
|
elm = stack[i]
|
|
if elm == '(':
|
|
paren_count += 1
|
|
elif elm == ')':
|
|
paren_count -= 1
|
|
if paren_count == 0:
|
|
attr_end = i + 1
|
|
break
|
|
new_stack = stack[0:attr_index] + stack[attr_end:]
|
|
debug.debug("stripped stack is %s"% new_stack)
|
|
return new_stack
|
|
except:
|
|
return stack
|
|
|
|
|
|
|
|
supportedAccessSpecifier = [
|
|
'public',
|
|
'protected',
|
|
'private'
|
|
]
|
|
|
|
|
|
##
|
|
## @brief Join the class name element : ['class', 'Bar', ':', ':', 'Foo'] -> ['class', 'Bar::Foo']
|
|
## @param table Input table to convert. ex: [':', '\t', 'class', 'Bar', ':', ':', 'Foo']
|
|
## @return The new table. ex: ['class', 'Bar::Foo']
|
|
##
|
|
def create_compleate_class_name(table):
|
|
compleateLine = ""
|
|
compleateLine = compleateLine.join(table);
|
|
if "::" not in compleateLine:
|
|
return table
|
|
# we need to convert it :
|
|
out = []
|
|
for name in table:
|
|
if len(out) == 0:
|
|
out.append(name)
|
|
elif name == ":" \
|
|
and out[-1].endswith(":"):
|
|
out[-1] += name
|
|
elif out[-1].endswith("::"):
|
|
out[-2] += out[-1] + name
|
|
del out[-1]
|
|
else:
|
|
out.append(name)
|
|
return out
|
|
|
|
class parse_file():
|
|
|
|
def __init__(self, fileName):
|
|
self.m_classes = []
|
|
self.m_elementParseStack = []
|
|
debug.info("Parse File tod document : '" + fileName + "'")
|
|
|
|
self.headerFileName = fileName
|
|
|
|
self.anon_union_counter = [-1, 0]
|
|
# load all the file data :
|
|
headerFileStr = lutinTools.FileReadData(fileName)
|
|
|
|
# Make sure supportedAccessSpecifier are sane
|
|
for i in range(0, len(supportedAccessSpecifier)):
|
|
if " " not in supportedAccessSpecifier[i]: continue
|
|
supportedAccessSpecifier[i] = re.sub("[ ]+", " ", supportedAccessSpecifier[i]).strip()
|
|
|
|
# Strip out template declarations
|
|
# TODO : What is the real need ???
|
|
headerFileStr = re.sub("template[\t ]*<[^>]*>", "", headerFileStr)
|
|
# remove all needed \r unneeded ==> this simplify next resExp ...
|
|
headerFileStr = re.sub("\r", "\r\n", headerFileStr)
|
|
headerFileStr = re.sub("\r\n\n", "\r\n", headerFileStr)
|
|
headerFileStr = re.sub("\r", "", headerFileStr)
|
|
# TODO : Can generate some error ...
|
|
headerFileStr = re.sub("\#if 0(.*?)(\#endif|\#else)", "", headerFileStr, flags=re.DOTALL)
|
|
|
|
debug.debug(headerFileStr)
|
|
|
|
# Change multi line #defines and expressions to single lines maintaining line nubmers
|
|
matches = re.findall(r'(?m)^(?:.*\\\n)+.*$', headerFileStr)
|
|
is_define = re.compile(r'[ \t\v]*#[Dd][Ee][Ff][Ii][Nn][Ee]')
|
|
for m in matches:
|
|
#Keep the newlines so that linecount doesnt break
|
|
num_newlines = len(filter(lambda a: a=="\n", m))
|
|
if is_define.match(m):
|
|
new_m = m.replace("\n", "<**multiLine**>\\n")
|
|
else:
|
|
# Just expression taking up multiple lines, make it take 1 line for easier parsing
|
|
new_m = m.replace("\\\n", " ")
|
|
if (num_newlines > 0):
|
|
new_m += "\n"*(num_newlines)
|
|
headerFileStr = headerFileStr.replace(m, new_m)
|
|
|
|
#Filter out Extern "C" statements. These are order dependent
|
|
headerFileStr = re.sub(r'extern( |\t)+"[Cc]"( |\t)*{', "{", headerFileStr)
|
|
|
|
###### debug.info(headerFileStr)
|
|
self.stack = [] # token stack to find the namespace and the element name ...
|
|
self.nameStack = [] #
|
|
self.braceDepth = 0
|
|
lex.lex()
|
|
lex.input(headerFileStr)
|
|
global curLine
|
|
global curChar
|
|
curLine = 0
|
|
curChar = 0
|
|
while True:
|
|
tok = lex.token()
|
|
if not tok:
|
|
break
|
|
tok.value = TagStr(tok.value, lineno=tok.lineno)
|
|
debug.debug("TOK: " + str(tok))
|
|
self.stack.append( tok.value )
|
|
curLine = tok.lineno
|
|
curChar = tok.lexpos
|
|
if (tok.type in ('PRECOMP_MACRO', 'PRECOMP_MACRO_CONT')):
|
|
debug.debug("PRECOMP: " + str(tok))
|
|
self.stack = []
|
|
self.nameStack = []
|
|
# Do nothing for macro ==> many time not needed ...
|
|
continue
|
|
if (tok.type == 'OPEN_BRACE'):
|
|
# When we open a brace, this is the time to parse the stack ...
|
|
# Clean the stack : (remove \t\r\n , and concatenate the 'xx', ':', ':', 'yy' in 'xx::yy',
|
|
self.nameStack = create_compleate_class_name(self.nameStack)
|
|
if len(self.nameStack) <= 0:
|
|
#open brace with no name ...
|
|
debug.warning("[" + str(self.braceDepth) + "] find an empty stack ...")
|
|
elif 'namespace' in self.nameStack:
|
|
debug.info("[" + str(self.braceDepth) + "] find a namespace : " + str(self.nameStack));
|
|
elif 'class' in self.nameStack:
|
|
debug.info("[" + str(self.braceDepth) + "] find a class : " + str(self.nameStack));
|
|
elif 'enum' in self.nameStack:
|
|
debug.info("[" + str(self.braceDepth) + "] find a enum : " + str(self.nameStack));
|
|
elif 'struct' in self.nameStack:
|
|
debug.info("[" + str(self.braceDepth) + "] find a struct : " + str(self.nameStack));
|
|
elif 'typedef' in self.nameStack:
|
|
debug.info("[" + str(self.braceDepth) + "] find a typedef : " + str(self.nameStack));
|
|
elif 'union' in self.nameStack:
|
|
debug.info("[" + str(self.braceDepth) + "] find a union : " + str(self.nameStack));
|
|
else:
|
|
debug.warning("[" + str(self.braceDepth) + "] find an unknow stack : " + str(self.nameStack))
|
|
self.stack = []
|
|
self.nameStack = []
|
|
self.braceDepth += 1
|
|
elif tok.type == 'CLOSE_BRACE':
|
|
self.braceDepth -= 1
|
|
debug.info("[" + str(self.braceDepth) + "] close brace");
|
|
if len(self.m_elementParseStack) != 0 \
|
|
and self.m_elementParseStack[len(self.m_elementParseStack)-1]['level'] == self.braceDepth :
|
|
self.m_elementParseStack.pop()
|
|
if tok.type == 'OPEN_PAREN':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'CLOSE_PAREN':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'OPEN_SQUARE_BRACKET':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'CLOSE_SQUARE_BRACKET':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'TAB':
|
|
pass
|
|
elif tok.type == 'EQUALS':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'COMMA':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'BACKSLASH':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'PIPE':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'PERCENT':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'CARET':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'EXCLAMATION':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'SQUOTE':
|
|
pass
|
|
elif tok.type == 'NUMBER':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'MINUS':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'PLUS':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'STRING_LITERAL':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'NAME' \
|
|
or tok.type == 'AMPERSTAND' \
|
|
or tok.type == 'ASTERISK' \
|
|
or tok.type == 'CHAR_LITERAL':
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'COLON':
|
|
if self.nameStack[0] in ['private', 'protected', 'public']:
|
|
debug.info("[" + str(self.braceDepth) + "] change visibility : " + self.nameStack[0]);
|
|
self.nameStack = []
|
|
self.stack = []
|
|
else :
|
|
self.nameStack.append(tok.value)
|
|
elif tok.type == 'SEMI_COLON':
|
|
if len(self.nameStack) != 0:
|
|
debug.info("[" + str(self.braceDepth) + "] semicolumn : " + str(self.nameStack));
|
|
self.stack = []
|
|
self.nameStack = []
|