[DEV] create the monk basic library

This commit is contained in:
Edouard DUPIN 2013-12-22 18:55:48 +01:00
commit b6c956edef
172 changed files with 23716 additions and 0 deletions

37
codeBB/BB_Code.py Normal file
View File

@ -0,0 +1,37 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import codeHL
import re
##
## @brief Transcode balise :
## [code language=cpp]
## int main(void) {
## return 0;
## }
## [/code]
## @param[in] value String to transform.
## @return Transformed string.
##
def transcode(value):
#value = re.sub(r'\[code(( |\t|\n|\r)+style=(.*))?\](.*?)\[/code\]',
value = re.sub(r'\[code(( |\t|\n|\r)+style=(.*?))?\](.*?)\[/code\]',
replace_code, #r'<pre>\4</pre>',
value,
flags=re.DOTALL)
# TODO : remove the basic indentation of the element (to have a better display in the text tutorial ...
return value
def replace_code(match):
if match.group() == "":
return ""
#debug.info("plop: " + str(match.groups()))
value = codeHL.transcode(match.groups()[2], match.groups()[3])
return '<pre>' + value + '</pre>'

19
codeBB/BB_Image.py Normal file
View File

@ -0,0 +1,19 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
##
## @brief Transcode balise:
## [img w=125 h=45]dossier/image.jpg[/img]
## [img w=125 h=45]http://plop.com/dossier/image.png[/img]
## @param[in] value String to transform.
## @return Transformed string.
##
def transcode(value):
return value

72
codeBB/BB_IndentAndDot.py Normal file
View File

@ -0,0 +1,72 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
##
## @brief Transcode
## commencez les ligne par ":" comme:
## : 1
## : 2
## ::2.1
## ::2.2
## :::2.2.1
## ::::2.2.1.1
## :::::2.2.1.1.1
## ::2.3
## :3
## resultat:
##
## 1
## 2
## 2.1
## 2.2
## 2.2.1
## 2.2.1.1
## 2.3
## 3
##
## note: lorsque vous sautez une ligne, la liste sarraite et en recommence une autre...
##
## Il est possible de mettre des ":" sans ligne appres ce qui genere une ligne vide..
##
## AND DOT
## **Ma ligne2 star consecutives engendrent des points quelque soit la position dans la ligne...
##
## Resultat:
##
## * premiere ligne
## * deusieme ligne
## @param[in] value String to transform.
## @return Transformed string.
##
def transcode(value):
value = re.sub(r'\n:',
r'\n:INDENT:',
value)
p = re.compile('((\:INDENT\:(.*?)\n)*)',
flags=re.DOTALL)
value = p.sub(replace_wiki_identation,
value)
value = re.sub(r'\*\*(.*?)\n',
r'<li>\1</li>',
value,
flags=re.DOTALL)
return value
def replace_wiki_identation(match):
if match.group() == "":
return ""
#debug.verbose("plop: " + str(match.group()))
value = "<ul>"
value += re.sub(r':INDENT:',
r'',
match.group())
value += "</ul>"
return transcode(value)

77
codeBB/BB_Link.py Normal file
View File

@ -0,0 +1,77 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
##
## @brief Transcode:
## [http://votre_site.con] => http://votre_site.con
## [http://votre_site.con | text displayed] => text displayed
## [http://votre_site.con text displayed] => text displayed.
##
## @param[in] value String to transform.
## @return Transformed string.
##
def transcode(value):
# named link : [[http://plop.html | link name]]
value = re.sub(r'\[\[http://(.*?) \| (.*?)\]\]',
r'<a href="http://\1">\2</a>',
value)
# direct link : [[http://plop.html]]
value = re.sub(r'\[\[http://(.*?)\]\]',
r'<a href="http://\1">http://\1</a>',
value)
# direct lib link : [lib[libname]]
value = re.sub(r'\[lib\[(.*?) \| (.*?)\]\]',
r'<a href="../\1">\2</a>',
value)
value = re.sub(r'\[doc\[(.*?) \| (.*?)\]\]',
r'<a href="\1.html">\2</a>',
value)
value = re.sub(r'\[tutorial\[(.*?) \| (.*?)\]\]',
r'<a href="tutorial_\1.html">\2</a>',
value)
value = re.sub(r'\[class\[(.*?)\]\]',
replace_link_class,
value)
"""
p = re.compile('\[\[(.*?)(|(.*?))\]\])',
flags=re.DOTALL)
value = p.sub(replace_link,
value)
"""
return value
"""
def replace_link(match):
if match.group() == "":
return ""
#debug.verbose("plop: " + str(match.group()))
value = "<ul>"
value += re.sub(r':INDENT:',
r'',
match.group())
value += "</ul>"
return transcode(value)
"""
def replace_link_class(match):
if match.group() == "":
return ""
#debug.info("plop: " + str(match.group()))
className = match.groups()[0]
value = re.sub(':', '_', className)
return '<a href="' + value + '.html">' + className + '</a>'

View File

@ -0,0 +1,17 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
##
## @brief Transcode thales specification ...
## @param[in] value String to transform.
## @return Transformed string.
##
def transcode(value):
return value

42
codeBB/BB_Table.py Normal file
View File

@ -0,0 +1,42 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
##
## @brief Transcode table:
## { | tableau_type_1
## | [b]colone 1[/b]
## ligne 1
## | colone 2 ligne 1
## |---
## | colone 1 ligne 1
## | colone 2 ligne 2
## |}
## Avec autant de ligne et de colone que vous voullez..
## Il est possible de faire des retour a la ligne dans une case du tableau...
## En bref sa tend a marcher comme sur un Wiki...
##
## result:
## +-------------------------------------+
## | colone 1 |
## +------------------+------------------+
## | ligne 1 | colone 2 ligne 1 |
## +------------------+------------------+
## | colone 1 ligne 1 | colone 2 ligne 2 |
## +------------------+------------------+
##
## TODO : Create simple table like :
## | colone 1 ||
## | ligne 1 | colone 2 ligne 1 |
## | colone 1 ligne 1 | colone 2 ligne 2|
## @param[in] value String to transform.
## @return Transformed string.
##
def transcode(value):
return value

91
codeBB/BB_Text.py Normal file
View File

@ -0,0 +1,91 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
##
## @brief Transcode .
## [b]texte ici[/b]
## [i]texte ici[/i]
## [u]texte ici[/u]
## [strike]texte ici[/strike]
## [color=olive]texte ici[/color]
## [color=#456FF33F]texte ici[/color]
## Left : [left]texte ici[/left]
## Center : [center]texte ici[/center]
## Right : [right]texte ici[/right]
## [size=22]sdfgsdfgsdgsfd[/size]
## [cadre]mettre les code ici[/cadre]
## @param[in] string String to transform.
## @return Transformed string.
##
def transcode(value):
value = re.sub(r'\[b\](.*?)\[/b\]',
r'<span style="font-weight: bold;">\1</span>',
value,
flags=re.DOTALL)
value = re.sub(r'\[i\](.*?)\[/i\]',
r'<span style="font-style: italic;">\1</span>',
value,
flags=re.DOTALL)
value = re.sub(r'\[u\](.*?)\[/u\]',
r'<span style="text-decoration: underline;">\1</span>',
value,
flags=re.DOTALL)
value = re.sub(r'\[sup\](.*?)\[/sup\]',
r'<sup>\1</sup>',
value,
flags=re.DOTALL)
value = re.sub(r'\[sub\](.*?)\[/sub\]',
r'<sub>\1</sub>',
value,
flags=re.DOTALL)
value = re.sub(r'\[color=(\#[0-9A-F]{6}|[a-z\-]+)\](.*?)\[/color\]',
r'<span style="color: \1;">\2</span>',
value,
flags=re.DOTALL)
value = re.sub(r'\[center\](.*)\[/center\]',
r'<div align="center">\1</div>',
value,
flags=re.DOTALL)
value = re.sub(r'\[right\](.*?)\[/right\]',
r'<div align="right">\1</div>',
value,
flags=re.DOTALL)
value = re.sub(r'\[left\](.*?)\[/left\]',
r'<div align="left">\1</div>',
value,
flags=re.DOTALL)
value = re.sub(r'\[strike\](.*?)\[/strike\]',
r'<span><strike>\1</strike></span>',
value,
flags=re.DOTALL)
value = re.sub(r'\[size=(.*?)\](.*?)\[/size\]',
r'<span style="font-size: \1px; line-height: normal;">\2</span>',
value,
flags=re.DOTALL)
value = re.sub(r'\[cadre\](.*?)\[/cadre\]',
r'<table align="center" border="0" cellpadding="3" cellspacing="1" width="90%"><tbody><tr><td class="quote">\1</td></tr></tbody></table>',
value,
flags=re.DOTALL)
value = re.sub(r'____(.*?)\n',
r'<hr>',
value,
flags=re.DOTALL)
return value

49
codeBB/BB_Title.py Normal file
View File

@ -0,0 +1,49 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
##
## @brief Transcode .
## =?=Page Title=?=
## ==Title 1==
## ===Title 2===
## ====Title 3====
## =====Title 4=====
## ======Title 5======
## @param[in] value String to transform.
## @return Transformed string.
##
def transcode(value):
value = re.sub(r'=\?=(.*?)=\?=',
r'<h1><center>\1</center></h1>',
value)
value = re.sub(r'\n======(.*?)======',
r'\n<h5>\1</h5>',
value)
value = re.sub(r'\n=====(.*?)=====',
r'\n<h4>\1</h4>',
value)
value = re.sub(r'\n====(.*?)====',
r'\n<h3>\1</h3>',
value)
value = re.sub(r'\n===(.*?)===',
r'\n<h2>\1</h2>',
value)
value = re.sub(r'\n==(.*?)==',
r'\n<h1>\1</h1>',
'\n' + value)
value = value[1:]
return value

27
codeBB/BB_comment.py Normal file
View File

@ -0,0 +1,27 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
##
## @brief Transcode balise:
## /* ... */
## @param[in] value String to transform.
## @return Transformed string.
##
def transcode(value):
value = re.sub(r'\/\*(.*?)\*\/',
r'',
value,
flags=re.DOTALL)
"""
value = re.sub(r'\/\/(.*?)\n',
r'',
value)
"""
return value

30
codeBB/BB_lineReturn.py Normal file
View File

@ -0,0 +1,30 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
##
## @brief Transcode balise:
## \n\n ==> <br/>
## @param[in] value String to transform.
## @return Transformed string.
##
def transcode(value):
value = re.sub(r'\r\n',
r'\n',
value)
value = re.sub(r'\n\n',
r'<br/>',
value)
value = re.sub(r'<br/>',
r'<br/>\n',
value)
return value

52
codeBB/codeBB.py Normal file
View File

@ -0,0 +1,52 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
import BB_Title
import BB_Text
import BB_IndentAndDot
import BB_Link
import BB_Image
import BB_Table
import BB_comment
import BB_lineReturn
import BB_Code
import BB_Specification
##
## @brief Transcode input data in the corect format.
## @param[in] string String to transform.
## @return Transformed string.
##
def transcode(value):
# remove html property
value = re.sub(r'<', r'&lt;', value)
value = re.sub(r'>', r'&gt;', value)
value = BB_comment.transcode(value)
value = BB_Title.transcode(value)
value = BB_Text.transcode(value)
value = BB_IndentAndDot.transcode(value)
value = BB_Link.transcode(value)
value = BB_Image.transcode(value)
value = BB_Table.transcode(value)
value = BB_Code.transcode(value)
value = BB_Specification.transcode(value)
value = BB_lineReturn.transcode(value)
return value
##
## @brief transcode a BBcode file in a html file
## @return True if the file is transformed
##
def transcode_file(inputFileName, outputFileName):
inData = monkTools.file_read_data(inputFileName)
if inData == "":
return False
outData = transcode(inData)
debug.warning(" out: " + outputFileName)
monkTools.file_write_data(outputFileName, outData)
return True

31
codeHL/codeHL.py Normal file
View File

@ -0,0 +1,31 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import codeHLcpp
import codeHLBBcode
import codeHLJava
import codeHLjson
import codeHLPython
import codeHLXML
import codeHLshell
def transcode(type, value):
if type == "c++":
value = codeHLcpp.transcode(value)
elif type == "java":
value = codeHLJava.transcode(value)
elif type == "bbcode":
value = codeHLBBcode.transcode(value)
elif type == "python":
value = codeHLPython.transcode(value)
elif type == "json":
value = codeHLjson.transcode(value)
elif type == "xml":
value = codeHLXML.transcode(value)
elif type == "shell":
value = codeHLshell.transcode(value)
return value

9
codeHL/codeHLBBcode.py Normal file
View File

@ -0,0 +1,9 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
def transcode(value):
return value

9
codeHL/codeHLJava.py Normal file
View File

@ -0,0 +1,9 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
def transcode(value):
return value

9
codeHL/codeHLPython.py Normal file
View File

@ -0,0 +1,9 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
def transcode(value):
return value

9
codeHL/codeHLXML.py Normal file
View File

@ -0,0 +1,9 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
def transcode(value):
return value

63
codeHL/codeHLcpp.py Normal file
View File

@ -0,0 +1,63 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
listRegExp = [
[ r'/\*\*(.*?)\*/', 'code-doxygen'],
[ r'/\*(.*?)\*/', 'code-comment'],
[ r'//!(.*?)\n', 'code-doxygen'],
[ r'//(.*?)\n', 'code-comment'],
[ r'#(.*?)\n', 'code-preproc'],
[ r'"((\\"|.)*?)"', 'code-text-quote'],
[ r"'(('|.)*?)'", 'code-text-quote'],
[ r'(inline|const|class|virtual|private|public|protected|friend|const|extern|auto|register|static|volatile|typedef|struct|union|enum)',
'code-storage-keyword'],
[ r'(bool|BOOL|char(16_t|32_t)?|double|float|u?int(8|16|32|64|128)?(_t)?|long|short|signed|size_t|unsigned|void|(I|U)(8|16|32|64|128))',
'code-type'],
[ r'(((0(x|X)[0-9a-fA-F]*)|(\d+\.?\d*|\.\d+)((e|E)(\+|\-)?\d+)?)(L|l|UL|ul|u|U|F|f)?)',
'code-number'],
[ r'(m_[A-Za-z_0-9])',
'code-member'],
[ r'(( |\t)_[A-Za-z_0-9]*)',
'code-input-function'],
[ r'(return|goto|if|else|case|default|switch|break|continue|while|do|for|sizeof)( |\t|\(|\{)',
'code-keyword'],
[ r'((new|delete|try|catch|memset|fopen|fread|fwrite|fgets|fclose|printf|(f|s|diag_)printf|calloc|malloc|realloc|(cyg|sup)_([a-z]|[A-Z]|[0-9]|_)+)( |\t|\())',
'code-function-system'],
[ r'((\w|_)+[ \t]*\()',
'code-function-name'],
[ r'(NULL|MAX|MIN|__LINE__|__DATA__|__FILE__|__func__|__TIME__|__STDC__)',
'code-generic-define'],
[ r'([A-Z_][A-Z_0-9]{3,500})',
'code-macro"'],
[ r'(==|>=|<=|!=|>{1,2}|<{1,2}|&&|\{|\})',
'code-operator'],
[ r'(true|TRUE|false|FALSE)',
'<code-operator'],
[ r'((\w+::)+\w+)',
'code-class']
]
def transcode(value):
inValue = value
outValue = ""
haveFindSomething = False;
for reg1, color in listRegExp:
result = re.search(reg1, inValue, re.DOTALL)
while result != None:
haveFindSomething = True
# sub parse the start :
outValue += transcode(inValue[:result.start()])
# transform local
outValue += '<span class="' + color + '">'
outValue += result.group()
outValue += '</span>'
# change the input value
inValue = inValue[result.end():]
# Search again ...
result = re.search(reg1, inValue, re.DOTALL)
outValue += inValue
return outValue

10
codeHL/codeHLjson.py Normal file
View File

@ -0,0 +1,10 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
def transcode(value):
return value

14
codeHL/codeHLshell.py Normal file
View File

@ -0,0 +1,14 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
import re
listRegExp = [
[ r'#(.*?)\n', r'<span class="code-preproc">#\1</span>\n']
]
def transcode(value):
for reg1, reg2 in listRegExp:
value = re.sub(reg1, reg2, value, flags=re.DOTALL)
return value

108
monk.py Executable file
View File

@ -0,0 +1,108 @@
#!/usr/bin/python
# for path inspection:
import sys
import os
import inspect
import fnmatch
import monkDebug as debug
import monkModule
import monkArg
import monkTools
myArg = monkArg.MonkArg()
myArg.add(monkArg.ArgDefine("h", "help", desc="display this help"))
myArg.add_section("option", "Can be set one time in all case")
myArg.add(monkArg.ArgDefine("v", "verbose", list=[["0","None"],["1","error"],["2","warning"],["3","info"],["4","debug"],["5","verbose"]], desc="Display makefile debug level (verbose) default =2"))
myArg.add(monkArg.ArgDefine("C", "color", desc="Display makefile output in color"))
myArg.add_section("cible", "generate in order set")
localArgument = myArg.parse()
##
## @brief Display the help of this makefile
##
def usage():
# generic argument displayed :
myArg.display()
print " all"
print " Build all (only for the current selected board) (bynary and packages)"
print " clean"
print " Clean all (same as previous)"
listOfAllModule = monkModule.list_all_module_with_desc()
for mod in listOfAllModule:
print " " + mod[0] + " / " + mod[0] + "-clean"
print " " + mod[1]
print " ex: " + sys.argv[0] + " all"
exit(0)
##
## @brief Preparse the argument to get the verbose element for debug mode
##
def parse_generic_arg(argument,active):
if argument.get_option_name() == "help":
#display help
if active==False:
usage()
return True
elif argument.get_option_name() == "verbose":
if active==True:
debug.set_level(int(argument.get_arg()))
return True
elif argument.get_option_name() == "color":
if active==True:
debug.enable_color()
return True
return False
##
## @brief Parse default unique argument:
##
if __name__ == "__main__":
for argument in localArgument:
parse_generic_arg(argument, True)
##
## @brief Run everything that is needed in the system
##
def start():
actionDone=False
# parse all argument
for argument in localArgument:
if parse_generic_arg(argument, False) == True:
continue
if argument.get_option_name() != "":
debug.warning("Can not understand argument : '" + argument.get_option_name() + "'")
usage()
else:
module = monkModule.get_module(argument.GetArg())
module.parse_code()
module.generate()
actionDone=True
# if no action done : we do "all" ...
if actionDone==False:
#Must generate all docs :
moduleElements = monkModule.get_all_module()
for module in moduleElements:
module.parse_code()
for module in moduleElements:
module.generate()
##
## @brief When the user use with make.py we initialise ourself
##
if __name__ == '__main__':
sys.path.append(monkTools.get_run_folder())
# Import all sub path without out and archive
for folder in os.listdir("."):
if os.path.isdir(folder)==True:
if folder.lower()!="android" \
and folder.lower()!="archive" \
and folder.lower()!="out" :
debug.debug("Automatic load path: '" + folder + "'")
monkModule.import_path(folder)
start()

250
monkArg.py Normal file
View File

@ -0,0 +1,250 @@
#!/usr/bin/python
import sys
import monkDebug as debug
class ArgElement:
def __init__(self, option, value=""):
self.m_option = option;
self.m_arg = value;
def get_option_name(self):
return self.m_option
def get_arg(self):
return self.m_arg
def display(self):
if len(self.m_arg)==0:
debug.info("option : " + self.m_option)
elif len(self.m_option)==0:
debug.info("element : " + self.m_arg)
else:
debug.info("option : " + self.m_option + ":" + self.m_arg)
class ArgDefine:
def __init__(self,
smallOption="", # like v for -v
bigOption="", # like verbose for --verbose
list=[], # ["val", "description"]
desc="",
haveParam=False):
self.m_optionSmall = smallOption;
self.m_optionBig = bigOption;
self.m_list = list;
if len(self.m_list)!=0:
self.m_haveParam = True
else:
if True==haveParam:
self.m_haveParam = True
else:
self.m_haveParam = False
self.m_description = desc;
def get_option_small(self):
return self.m_optionSmall
def get_option_big(self):
return self.m_optionBig
def need_parameters(self):
return self.m_haveParam
def get_porperties(self):
return ""
def check_availlable(self, argument):
if len(self.m_list)==0:
return True
for element,desc in self.m_list:
if element == argument:
return True
return False
def display(self):
if self.m_optionSmall != "" and self.m_optionBig != "":
print(" -" + self.m_optionSmall + " / --" + self.m_optionBig)
elif self.m_optionSmall != "":
print(" -" + self.m_optionSmall)
elif self.m_optionSmall != "":
print(" --" + self.m_optionBig)
else:
print(" ???? ==> internal error ...")
if self.m_description != "":
print(" " + self.m_description)
if len(self.m_list)!=0:
hasDescriptiveElement=False
for val,desc in self.m_list:
if desc!="":
hasDescriptiveElement=True
break;
if hasDescriptiveElement==True:
for val,desc in self.m_list:
print(" " + val + " : " + desc)
else:
tmpElementPrint = ""
for val,desc in self.m_list:
if len(tmpElementPrint)!=0:
tmpElementPrint += " / "
tmpElementPrint += val
print(" { " + tmpElementPrint + " }")
def parse(self, argList, currentID):
return currentID;
class ArgSection:
def __init__(self,
sectionName="",
desc=""):
self.m_section = sectionName;
self.m_description = desc;
def get_option_small(self):
return ""
def get_option_big(self):
return ""
def get_porperties(self):
return " [" + self.m_section + "]"
def display(self):
print(" [" + self.m_section + "] : " + self.m_description)
def parse(self, argList, currentID):
return currentID;
class MonkArg:
def __init__(self):
self.m_listProperties = []
def add(self, argument):
self.m_listProperties.append(argument) #argDefine(smallOption, bigOption, haveParameter, parameterList, description));
def add_section(self, sectionName, sectionDesc):
self.m_listProperties.append(ArgSection(sectionName, sectionDesc))
def parse(self):
listArgument = [] # composed of list element
NotParseNextElement=False
for iii in range(1, len(sys.argv)):
# special case of parameter in some elements
if NotParseNextElement==True:
NotParseNextElement = False
continue
debug.verbose("parse [" + str(iii) + "]=" + sys.argv[iii])
argument = sys.argv[iii]
optionList = argument.split("=")
debug.verbose(str(optionList))
if type(optionList) == type(str()):
option = optionList
else:
option = optionList[0]
optionParam = argument[len(option)+1:]
debug.verbose(option)
argumentFound=False;
if option[:2]=="--":
# big argument
for prop in self.m_listProperties:
if prop.get_option_big()=="":
continue
if prop.get_option_big() == option[2:]:
# find it
debug.verbose("find argument 2 : " + option[2:])
if prop.need_parameters()==True:
internalSub = option[2+len(prop.get_option_big()):]
if len(internalSub)!=0:
if len(optionParam)!=0:
# wrong argument ...
debug.warning("maybe wrong argument for : '" + prop.get_option_big() + "' cmdLine='" + argument + "'")
prop.display()
continue
optionParam = internalSub
if len(optionParam)==0:
#Get the next parameters
if len(sys.argv) > iii+1:
optionParam = sys.argv[iii+1]
NotParseNextElement=True
else :
# missing arguments
debug.warning("parsing argument error : '" + prop.get_option_big() + "' Missing : subParameters ... cmdLine='" + argument + "'")
prop.display()
exit(-1)
if prop.check_availlable(optionParam)==False:
debug.warning("argument error : '" + prop.get_option_big() + "' SubParameters not availlable ... cmdLine='" + argument + "' option='" + optionParam + "'")
prop.display()
exit(-1)
listArgument.append(ArgElement(prop.get_option_big(),optionParam))
argumentFound = True
else:
if len(optionParam)!=0:
debug.warning("parsing argument error : '" + prop.get_option_big() + "' need no subParameters : '" + optionParam + "' cmdLine='" + argument + "'")
prop.Display()
listArgument.append(ArgElement(prop.get_option_big()))
argumentFound = True
break;
if False==argumentFound:
debug.error("UNKNOW argument : '" + argument + "'")
elif option[:1]=="-":
# small argument
for prop in self.m_listProperties:
if prop.get_option_small()=="":
continue
if prop.get_option_small() == option[1:1+len(prop.get_option_small())]:
# find it
debug.verbose("find argument 1 : " + option[1:1+len(prop.get_option_small())])
if prop.need_parameters()==True:
internalSub = option[1+len(prop.get_option_small()):]
if len(internalSub)!=0:
if len(optionParam)!=0:
# wrong argument ...
debug.warning("maybe wrong argument for : '" + prop.get_option_big() + "' cmdLine='" + argument + "'")
prop.display()
continue
optionParam = internalSub
if len(optionParam)==0:
#Get the next parameters
if len(sys.argv) > iii+1:
optionParam = sys.argv[iii+1]
NotParseNextElement=True
else :
# missing arguments
debug.warning("parsing argument error : '" + prop.get_option_big() + "' Missing : subParameters cmdLine='" + argument + "'")
prop.display()
exit(-1)
if prop.check_availlable(optionParam)==False:
debug.warning("argument error : '" + prop.get_option_big() + "' SubParameters not availlable ... cmdLine='" + argument + "' option='" + optionParam + "'")
prop.display()
exit(-1)
listArgument.append(ArgElement(prop.get_option_big(),optionParam))
argumentFound = True
else:
if len(optionParam)!=0:
debug.warning("parsing argument error : '" + prop.get_option_big() + "' need no subParameters : '" + optionParam + "' cmdLine='" + argument + "'")
prop.display()
listArgument.append(ArgElement(prop.get_option_big()))
argumentFound = True
break;
if argumentFound==False:
#unknow element ... ==> just add in the list ...
debug.verbose("unknow argument : " + argument)
listArgument.append(ArgElement("", argument))
#for argument in listArgument:
# argument.Display()
#exit(0)
return listArgument;
def display(self):
print "usage:"
listOfPropertiesArg = "";
for element in self.m_listProperties :
listOfPropertiesArg += element.get_porperties()
print " " + sys.argv[0] + listOfPropertiesArg + " ..."
for element in self.m_listProperties :
element.display()

62
monkClass.py Normal file
View File

@ -0,0 +1,62 @@
#!/usr/bin/python
import monkDebug as debug
import monkNode as Node
##
## @brief transform template descrption in one element.
## @param[in] list of elements. ex : 'public', 'ewol::classee', '<', 'plop', '<', 'uint8_t', ',', 'int32_t', '>', '>'
## @return a simplify list. ex : 'public', 'ewol::classee<plop<uint8_t,int32_t>>'
##
def concatenate_template(list):
# TODO ...
return list
class Class(Node.Node):
def __init__(self, stack=[], file="", lineNumber=0, documentation=[]):
# check input :
if len(stack) < 2:
debug.error("Can not parse class : " + str(stack))
return
Node.Node.__init__(self, 'class', stack[1], file, lineNumber, documentation)
self.subList = []
self.access = "private"
# heritage list :
self.inherit = []
if len(stack) == 2:
# just a simple class...
return
if len(stack) == 3:
debug.error("error in parsing class : " + str(stack))
return
if stack[2] != ':':
debug.error("error in parsing class : " + str(stack) + " missing ':' at the 3rd position ...")
list = concatenate_template(stack[3:])
debug.verbose("inherit : " + str(list))
access = "private"
for element in list:
if element in ['private', 'protected', 'public']:
access = element
elif element == ',':
pass
else:
self.inherit.append({'access' : access, 'class' : element})
debug.verbose("class : " + self.to_str())
def to_str(self) :
ret = "class " + self.name
if len(self.inherit) != 0 :
ret += " : "
isFirst = True
for element in self.inherit:
if isFirst == False:
ret += ", "
isFirst = False
ret += element['access'] + " " + element['class']
ret += " { ... };"
return ret

75
monkDebug.py Normal file
View File

@ -0,0 +1,75 @@
#!/usr/bin/python
import os
debugLevel=3
debugColor=False
color_default= ""
color_red = ""
color_green = ""
color_yellow = ""
color_blue = ""
color_purple = ""
color_cyan = ""
def set_level(id):
global debugLevel
debugLevel = id
def enable_color():
global debugColor
debugColor = True
global color_default
color_default= "\033[00m"
global color_red
color_red = "\033[31m"
global color_green
color_green = "\033[32m"
global color_yellow
color_yellow = "\033[33m"
global color_blue
color_blue = "\033[34m"
global color_purple
color_purple = "\033[35m"
global color_cyan
color_cyan = "\033[36m"
def verbose(input):
global debugLock
global debugLevel
if debugLevel >= 5:
print(color_blue + input + color_default)
def debug(input):
global debugLock
global debugLevel
if debugLevel >= 4:
print(color_green + input + color_default)
def info(input):
global debugLock
global debugLevel
if debugLevel >= 3:
print(input + color_default)
def warning(input):
global debugLock
global debugLevel
if debugLevel >= 2:
print(color_purple + "[WARNING] " + input + color_default)
def error(input, threadID=-1):
global debugLock
global debugLevel
if debugLevel >= 1:
print(color_red + "[ERROR] " + input + color_default)
exit(-1)
def print_element(type, lib, dir, name):
global debugLock
global debugLevel
if debugLevel >= 3:
print(color_cyan + type + color_default + " : " + color_yellow + lib + color_default + " " + dir + " " + color_blue + name + color_default)

45
monkEnum.py Normal file
View File

@ -0,0 +1,45 @@
#!/usr/bin/python
import monkDebug as debug
import monkNode as Node
class Enum(Node.Node):
def __init__(self, stack=[], file="", lineNumber=0, documentation=[]):
self.baseValue = 0;
# check input :
if len(stack) < 2:
debug.error("Can not parse class : " + str(stack))
return
self.typedef = False
if stack[0] == 'typedef':
self.typedef = True
stack[1:]
Node.Node.__init__(self, 'enum', stack[1], file, lineNumber, documentation)
self.listElement = []
def to_str(self) :
return "enum " + self.name + " { ... };"
def enum_append(self, stack):
subList = []
tmp = []
for element in stack:
if element == ',':
subList.append(tmp)
tmp = []
else:
tmp.append(element)
if len(tmp) != 0:
subList.append(tmp)
#debug.verbose(" TODO : Need to append enum : " + str(subList))
for element in subList:
value = ""
if len(element) > 2:
if element[1] == '=':
for tmp in element[2:]:
value += tmp
self.listElement.append({'name' : element[0], 'value' : value})
debug.verbose("enum list : " + str(self.listElement))

418
monkHtml.py Normal file
View File

@ -0,0 +1,418 @@
#!/usr/bin/python
import monkDebug as debug
import sys
import monkTools
#import CppHeaderParser
import re
import codeBB
import collections
global_class_link = {
"std::string" : "http://www.cplusplus.com/reference/string/string/",
"std::u16string" : "http://www.cplusplus.com/reference/string/u16string/",
"std::u32string" : "http://www.cplusplus.com/reference/string/u32string/",
"std::wstring" : "http://www.cplusplus.com/reference/string/wstring/",
"std::vector" : "http://www.cplusplus.com/reference/vector/vector/"
}
def replace_type(match):
value = "<span class=\"code-type\">" + match.group() + "</span>"
return value
def replace_storage_keyword(match):
value = "<span class=\"code-storage-keyword\">" + match.group() + "</span>"
return value
def display_color(valBase):
# storage keyword :
p = re.compile("(inline|const|class|virtual|private|public|protected|friend|const|extern|auto|register|static|volatile|typedef|struct|union|enum)")
val = p.sub(replace_storage_keyword, valBase)
# type :
p = re.compile("(bool|BOOL|char(16_t|32_t)?|double|float|u?int(8|16|32|64|128)?(_t)?|long|short|signed|size_t|unsigned|void|(I|U)(8|16|32|64|128))")
val = p.sub(replace_type, val)
return val, len(valBase)
def display_type(type, myDoc):
type = type.replace("inline ", "")
lenght = 0;
isFirst = True
out = ''
# we split all the element in list sepa=rated with space to keep class... and standard c+ class
for element in type.split(' '):
if isFirst == False:
out += " "
lenght += 1
isFirst = False
# check if the element in internal at the current lib
name, link = myDoc.get_class_link(element)
if len(link) != 0:
out += "<a href=\"" + link + "\" class=\"code-type\">" + name + "</a>"
lenght += len(element)
# Ckeck if the variable in a standard class:
elif element in global_class_link.keys():
out += "<a href=\"" + global_class_link[element] + "\" class=\"code-type\">" + element + "</a>"
lenght += len(element)
else:
data, lenghtTmp = display_color(element)
out += data
lenght += lenghtTmp
# get every subelement class :
return [out,lenght]
def display_doxygen_param(comment, input, output):
data = "<b>Parameter"
if input == True:
data += " [input]"
if output == True:
data += " [output]"
data += ":</b> "
#extract first element:
val = comment.find(" ")
var = comment[:val]
endComment = comment[val:]
# TODO : Check if it exist in the parameter list ...
data += "<span class=\"code-argument\">" + var + "</span> " + endComment
data += "<br/>"
return data
def parse_doxygen(data) :
streams = data.split("@")
data2 = ''
for element in streams:
if element[:1] == "\n" \
or element[:2] == "\n\n":
# nothing to do : Nomale case of the first \n
None
elif element[:6] == "brief ":
data2 += element[6:]
data2 += "<br/>"
for element in streams:
if element[:1] == "\n" \
or element[:2] == "\n\n":
# nothing to do : Nomale case of the first \n
None
elif element[:5] == "note ":
data2 += "<b>Notes:</b> "
data2 += element[5:]
data2 += "<br/> "
data3 = ''
for element in streams:
if element[:1] == "\n" \
or element[:2] == "\n\n":
# nothing to do : Nomale case of the first \n
None
elif element[:14] == "param[in,out] " \
or element[:14] == "param[out,in] ":
data3 += display_doxygen_param(element[14:], True, True)
elif element[:10] == "param[in] ":
data3 += display_doxygen_param(element[10:], True, False)
elif element[:11] == "param[out] ":
data3 += display_doxygen_param(element[11:], False, True)
elif element[:6] == "param ":
data3 += display_doxygen_param(element[6:], False, False)
elif element[:7] == "return ":
data3 += "<b>Return:</b> "
data3 += element[7:]
data3 += "<br/>"
if data3 != '':
data2 += "<ul>\n"
data2 += data3
data2 += "</ul>\n"
return data2
def white_space(size) :
ret = ''
for iii in range(len(ret), size):
ret += " "
return ret
def calsulateSizeFunction(function, size) :
if len(function["name"]) > size:
return len(function["name"])+1
return size
def calsulateSizeReturn(function, size) :
if len(function["rtnType"]) > size:
return len(function["rtnType"])+1
return size
def addSub(tree, filterSubNamespace=False):
return ""
# ##############################################################
# NEW function ...
# ##############################################################
def generate_menu(element, namespaceStack=[], level=1):
listBase = element.get_all_sub_type(['namespace'])
if len(listBase) == 0:
return ""
ret = ""
ret += '<ul class="niveau' + str(level) + '">\n'
for element in listBase:
namespaceStack.append(element['node'].get_name())
retTmp = generate_menu(element['node'], namespaceStack, level+1)
namespaceStack.pop()
if retTmp != "":
subMenu = ' class="sousmenu"'
else:
subMenu = ''
ret += ' <li' + subMenu + '>' + generate_link(element['node'], namespaceStack) + '\n'
ret += retTmp
ret += ' </li>\n'
ret += '</ul>\n'
return ret
def generate_html_page_name(element, namespaceStack):
link = ""
for name in namespaceStack:
link += name + "__"
return element.get_node_type() + "_" + link + element.get_name() + '.html'
def generate_name(element, namespaceStack):
link = ""
for name in namespaceStack:
link += name + "::"
return element.get_node_type() + ": " + link + element.get_name()
def generate_link(element, namespaceStack):
return '<a href="' + generate_html_page_name(element, namespaceStack) + '">' + element.get_name() + '</a>'
def calculate_methode_size(list):
returnSize = 0;
methodeSize = 0;
for element in list:
retType = ""
if element['node'].get_virtual() == True:
retType += 'virtual '
retType += element['node'].get_return_type().to_str()
tmpLen = len(retType)
if returnSize < tmpLen:
returnSize = tmpLen
tmpLen = len(element['node'].get_name())
if methodeSize < tmpLen:
methodeSize = tmpLen
return [returnSize, methodeSize]
def write_methode(element, namespaceStack, displaySize = None, link = True):
if displaySize == None:
displaySize = calculate_methode_size([element])
ret = ""
if 'access' in element.keys():
if element['access'] == 'private':
ret += '- '
elif element['access'] == 'protected':
ret += '# '
elif element['access'] == 'public':
ret += '+ '
else:
ret += ' '
retType = ""
if element['node'].get_virtual() == True:
retType += 'virtual '
retType += element['node'].get_return_type().to_str()
if retType != "":
retType2 = re.sub("<","&lt;", retType)
retType2 = re.sub(">","&gt;", retType2)
ret += retType2
ret += " "
ret += white_space(displaySize[0] - len(retType))
name = element['node'].get_name()
if link == True:
ret += '<a class="code-function" href="#' + str(element['node'].get_uid()) + '">' + name + '</a>'
else:
ret += '<span class="code-function">' + name + '</span>'
ret += white_space(displaySize[1] - len(name)) + ' ('
listParam = element['node'].get_param()
first = True
for param in listParam:
if first == False:
ret += ',<br/>'
ret += white_space(displaySize[0] + displaySize[1] +5)
first = False
retParam = param.get_type().to_str()
if retParam != "":
ret += retParam
ret += " "
ret += "<span class=\"code-argument\">" + param.get_name() + "</span>"
ret += ')'
if element['node'].get_virtual_pure() == True:
ret += ' = 0'
if element['node'].get_constant() == True:
ret += ' const'
ret += ';'
ret += '<br/>'
return ret
def generate_stupid_index_page(outFolder, header, footer, myLutinDoc):
# create index.hml :
filename = outFolder + "/index.html"
monkTools.create_directory_of_file(filename);
file = open(filename, "w")
file.write(header)
file.write("<h1>" + myLutinDoc.get_base_doc_node().get_name() + "</h1>");
file.write("<br/>");
file.write("TODO : Main page ...");
file.write("<br/>");
file.write("<br/>");
file.write(footer)
file.close();
def generate_page(outFolder, header, footer, element, namespaceStack=[]):
if element.get_node_type() in ['library', 'application', 'namespace', 'class', 'struct', 'enum', 'union']:
listBase = element.get_all_sub_type(['library', 'application', 'namespace', 'class', 'struct', 'enum', 'union'])
for elem in listBase:
if element.get_node_type() in ['namespace', 'class', 'struct']:
namespaceStack.append(element.get_name())
generate_page(outFolder, header, footer, elem['node'], namespaceStack)
namespaceStack.pop()
else:
generate_page(outFolder, header, footer, elem['node'], namespaceStack)
filename = outFolder + '/' + generate_html_page_name(element, namespaceStack)
monkTools.create_directory_of_file(filename);
file = open(filename, "w")
file.write(header)
file.write("<h1>" + generate_name(element, namespaceStack) + "</h1>");
file.write("<hr/>");
if element.get_node_type() == 'library':
file.write("TODO : the page ...");
elif element.get_node_type() == 'application':
file.write("TODO : the page ...");
elif element.get_node_type() == 'namespace':
file.write("TODO : the page ...");
elif element.get_node_type() == 'class':
# calculate element size :
listBase = element.get_all_sub_type(['methode', 'constructor', 'destructor'])
displayLen = calculate_methode_size(listBase)
file.write("<h2>Constructor and Destructor:</h2>\n")
file.write("<pre>\n");
listBaseConstructor = element.get_all_sub_type(['constructor'])
for elem in listBaseConstructor:
ret = write_methode(elem, namespaceStack, displayLen)
file.write(ret)
listBaseDestructor = element.get_all_sub_type(['destructor'])
for elem in listBaseDestructor:
ret = write_methode(elem, namespaceStack, displayLen)
file.write(ret)
file.write("</pre>\n");
file.write("<br/>\n")
file.write("<h2>Synopsis:</h2>\n")
file.write("<pre>\n");
listBaseMethode = element.get_all_sub_type(['methode'])
displayLen = calculate_methode_size(listBaseMethode)
for elem in listBaseMethode:
ret = write_methode(elem, namespaceStack, displayLen)
file.write(ret)
file.write("</pre>\n")
file.write("<br/>\n")
file.write("<h2>Description:</h2>\n")
# display all functions :
file.write("<h2>Detail:<h2>\n")
for element in listBase:
file.write('<h3><a id="' + str(element['node'].get_uid()) + '">' + element['node'].get_name() + '</a></h3>')
file.write("<pre>\n");
file.write(write_methode(element, namespaceStack, link = False))
file.write("</pre>\n");
#debug.info(str(element['node'].get_doc()));
file.write(parse_doxygen(element['node'].get_doc()));
file.write("<br/>\n");
file.write("<hr/>\n");
elif element.get_node_type() == 'struct':
file.write("TODO : the page ...");
elif element.get_node_type() == 'enum':
file.write("TODO : the page ...");
elif element.get_node_type() == 'union':
file.write("TODO : the page ...");
else:
# not in a specific file ...
debug.warning("might not appear here :'" + element.get_node_type() + "' = '" + element.get_name() + "'")
pass
file.write(footer)
file.close();
def generate(myLutinDoc, outFolder) :
myDoc = myLutinDoc.get_base_doc_node()
monkTools.copy_file(monkTools.get_current_path(__file__)+"/theme/base.css", outFolder+"/base.css")
monkTools.copy_file(monkTools.get_current_path(__file__)+"/theme/menu.css", outFolder+"/menu.css")
# create common header
genericHeader = '<!DOCTYPE html>\n'
genericHeader += '<html>\n'
genericHeader += '<head>\n'
genericHeader += ' <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">\n'
genericHeader += ' <title>' + myDoc.get_name() + ' Library</title>\n'
genericHeader += ' <link rel="stylesheet" href="base.css">\n'
genericHeader += ' <link rel="stylesheet" href="menu.css">\n'
genericHeader += '</head>\n'
genericHeader += '<body>\n'
genericHeader += ' <div class="navbar navbar-fixed-top">\n'
genericHeader += ' <div class="container">\n'
genericHeader += ' <h1>' + myDoc.get_name() + ' Library</h1>\n'
genericHeader += ' <div id="menu">\n'
#genericHeader += ' <h2>' + myDoc.moduleName + '</h2>\n'
genericHeader += generate_menu(myDoc)
#genericHeader += ' <h3> </h3>\n'
genericHeader += ' </div>\n'
genericHeader += " </div>\n"
genericHeader += " </div>\n"
genericHeader += " <div class=\"container\" id=\"content\">\n"
genericFooter = " </div>\n"
genericFooter += "</body>\n"
genericFooter += "</html>\n"
# create index.hml :
generate_stupid_index_page(outFolder, genericHeader, genericFooter, myLutinDoc)
# create the namespace index properties :
generate_page(outFolder, genericHeader, genericFooter, myDoc)
for docInputName,outpath in myLutinDoc.listDocFile :
debug.print_element("doc", myLutinDoc.name, "<==", docInputName)
outputFileName = outFolder + "/" + outpath.replace('/','_') +".html"
debug.debug("output file : " + outputFileName)
monkTools.create_directory_of_file(outputFileName)
inData = monkTools.file_read_data(docInputName)
if inData == "":
continue
outData = genericHeader + codeBB.transcode(inData) + genericFooter
monkTools.file_write_data(outputFileName, outData)
for docInputName,outpath in myLutinDoc.listTutorialFile :
debug.print_element("tutorial", myLutinDoc.name, "<==", docInputName)
outputFileName = outFolder + "/" + outpath+".html"
debug.debug("output file : " + outputFileName)
monkTools.create_directory_of_file(outputFileName)
inData = monkTools.file_read_data(docInputName)
if inData == "":
continue
outData = genericHeader + codeBB.transcode(inData) + genericFooter
monkTools.file_write_data(outputFileName, outData)

21
monkLibrary.py Normal file
View File

@ -0,0 +1,21 @@
#!/usr/bin/python
import monkDebug as debug
import os
import sys
import re
class Libray():
def __init__(self, libName):
self.name = libName
# CPP section:
self.namespaces = []
self.classes = []
# C section:
self.structs = []
self.variables = []
self.methodes = []
self.unions = []
self.types = []

168
monkMethode.py Normal file
View File

@ -0,0 +1,168 @@
##!/usr/bin/python
import monkDebug as debug
import monkNode as Node
import monkType as Type
import monkVariable as Variable
class Methode(Node.Node):
def __init__(self, stack=[], file="", lineNumber=0, documentation=[]):
name = ""
type = 'methode'
self.virtual = False
self.virtualPure = False
self.static = False
self.inline = False
self.const = False # the end of line cont methode is sont for the class ...
# remove constructer inside declaration ...
if ':' in stack:
res = []
for element in stack:
if element != ':':
res.append(element)
else:
break
stack = res
if stack[len(stack)-2] == '=' \
and stack[len(stack)-1] == '0':
stack = stack[:len(stack)-2]
self.virtualPure = True
if stack[0] == 'virtual':
self.virtual = True
stack = stack[1:]
if stack[0] == 'static':
self.static = True
stack = stack[1:]
if stack[0] == 'inline':
self.inline = True
stack = stack[1:]
if stack[len(stack)-1] == 'const':
self.const = True
stack = stack[:len(stack)-1]
namePos = -1
debug.verbose("methode parse : " + str(stack))
for iii in range(0, len(stack)-2):
if stack[iii+1] == '(':
name = stack[iii]
namePos = iii
break;
if namePos == 0:
debug.verbose("start with '" + str(name[0]) + "'")
if name[0] == '~':
type = 'destructor'
else:
type = 'constructor'
debug.verbose("methode name : " + name)
Node.Node.__init__(self, type, name, file, lineNumber, documentation)
self.returnType = Type.TypeNone()
self.variable = []
# create the return Type (Can be Empty)
retTypeStack = stack[:namePos]
debug.verbose("return : " + str(retTypeStack))
self.returnType = Type.Type(retTypeStack)
parameterStack = stack[namePos+2:len(stack)-1]
debug.verbose("parameter : " + str(parameterStack))
paramTmp = []
braceOpen = 0
for element in parameterStack:
if braceOpen == 0:
if element == ',':
self.variable.append(Variable.Variable(paramTmp))
paramTmp = []
elif element == '(':
paramTmp.append(element)
braceOpen += 1
else:
paramTmp.append(element)
else:
paramTmp.append(element)
if element == '(':
braceOpen += 1
elif element == ')':
braceOpen -= 1
if len(paramTmp) != 0:
self.variable.append(Variable.Variable(paramTmp))
def to_str(self):
ret = ""
if self.virtual == True:
ret += "virtual "
if self.static == True:
ret += "static "
if self.inline == True:
ret += "inline "
ret += self.returnType.to_str()
ret += " "
ret += self.name
ret += "("
# ...
ret += ")"
if self.virtualPure == True:
ret += " = 0"
if self.const == True:
ret += " const"
return ret
##
## @brief Get the status of the virtual function ( virtual XXX(...);)
## @return True if vitual is present, False otherwise
## @note This is only availlable for class methode
##
def get_virtual(self):
return self.virtual
##
## @brief Get the status of the virtual 'pure' function ( virtual XXX(...) = 0;)
## @return True if =0 is present, False otherwise
## @note This is only availlable for class methode
## @note Availlable only if the virtual is active
##
def get_virtual_pure(self):
return self.virtualPure
##
## @brief Get the status of the inline function ( inline XXX(...);)
## @return True if inline is present, False otherwise
##
def get_inline(self):
return self.inline
##
## @brief Get the status of the static function ( static XXX(...);)
## @return True if static is present, False otherwise
## @note This is only availlable for class methode
##
def get_static(self):
return self.static
##
## @brief Get the status of the constant function ( XXX(...) const;)
## @return True if const is present, False otherwise
## @note This is only availlable for class methode
##
def get_constant(self):
return self.const
##
## @brief Get the return type of the methode
## @return Return methode type (type: Type.Type)
##
def get_return_type(self):
return self.returnType
##
## @brief Get the list of parameter of the methode
## @return The requested list of parameter
##
def get_param(self):
return self.variable

353
monkModule.py Normal file
View File

@ -0,0 +1,353 @@
#!/usr/bin/python
import sys
import os
import inspect
import fnmatch
import monkDebug as debug
import monkTools as tools
import monkNode as Node
import monkParse as Parse
import monkHtml
class Module:
##
## @brief Module class represent all system needed for a specific
## module like
## - type (bin/lib ...)
## - dependency
## - flags
## - files
## - ...
##
def __init__(self, file, moduleName, moduleType):
## Remove all variable to prevent error of multiple deffinition of the module ...
self.originFile=''
self.originFolder=''
# type of the module:
self.type='LIBRARY'
# Name of the module
self.name=moduleName
self.listDocFile = []
self.structureLib = Node.MainNode("library", moduleName)
self.listTutorialFile = []
self.webSite = ""
self.pathParsing = ""
self.pathGlobalDoc = ""
self.externalLink = []
self.title = moduleName + " Library"
self.styleHtml = ""
## end of basic INIT ...
if moduleType == 'APPLICATION' \
or moduleType == 'LIBRARY':
self.type=moduleType
else :
debug.error('for module "%s"' %moduleName)
debug.error(' ==> error : "%s" ' %moduleType)
raise 'Input value error'
self.originFile = file;
self.originFolder = tools.get_current_path(self.originFile)
##
## @brief Set the module website (activate only when compile in release mode, else "../moduleName/)
## @param[in] url New Website url
##
def set_website(self, url):
self.webSite = url
##
## @brief set the parsing folder
## @param[in] path New path to parse
##
def set_path(self, path):
self.pathParsing = path
##
## @brief set the glabal documentation parsing folder
## @param[in] path New path to parse
##
def set_path_general_doc(self, path):
self.pathGlobalDoc = path
##
## @brief List of validate external library link (disable otherwise)
## @param[in] availlable List of all module link availlable
##
def set_external_link(self, availlable):
self.externalLink = availlable
##
## @brief Set the library title
## @param[in] title New title to set.
##
def set_title(self, title):
self.title = title
##
## @brief new html basic css file
## @param[in] file File of the css style sheet
##
def set_html_css(self, cssFile):
self.styleHtml = cssFile
##
## @brief Create the module documentation:
##
def parse_code(self):
debug.info('Parse documantation code : ' + self.name)
if self.pathParsing != "":
for root, dirnames, filenames in os.walk(self.pathParsing):
tmpList = fnmatch.filter(filenames, "*.h")
# Import the module :
for filename in tmpList:
fileCompleteName = os.path.join(root, filename)
debug.debug(" Find a file : '" + fileCompleteName + "'")
self.add_file(fileCompleteName)
# display the hierarchie of all the class and namespace ...
#self.structureLib.debug_display()
if self.pathGlobalDoc != "":
for root, dirnames, filenames in os.walk(self.pathGlobalDoc):
tmpList = fnmatch.filter(filenames, "*.bb")
# Import the module :
for filename in tmpList:
fileCompleteName = os.path.join(root, filename)
tutorialPath = os.path.join(self.pathGlobalDoc, "tutorial/")
debug.verbose(" Find a doc file : '" + fileCompleteName + "'")
pathBase = fileCompleteName[len(self.pathGlobalDoc):len(fileCompleteName)-3]
if fileCompleteName[:len(tutorialPath)] == tutorialPath:
self.add_file_doc(fileCompleteName, pathBase)
else:
self.add_tutorial_doc(fileCompleteName, pathBase)
##
## @brief Add a documentation file at the parsing system
## @param[in] filename File To add at the parsing element system.
## @param[in] outPath output system file.
## @return True if no error occured, False otherwise
##
def add_file_doc(self, filename, outPath):
debug.debug("adding file in documantation : '" + filename + "'");
self.listDocFile.append([filename, outPath])
##
## @brief Add a documentation file at the parsing system
## @param[in] filename File To add at the parsing element system.
## @param[in] outPath output system file.
## @return True if no error occured, False otherwise
##
def add_tutorial_doc(self, filename, outPath):
debug.debug("adding file in documantation : '" + filename + "'");
self.listTutorialFile.append([filename, outPath])
##
## @brief Add a file at the parsing system
## @param[in] filename File To add at the parsing element system.
## @return True if no error occured, False otherwise
##
def add_file(self, filename):
debug.debug("adding file in documantation : '" + filename + "'");
#parsedFile = Parse.parse_file("Widget.h")
#debug.error("plop")
parsedFile = Parse.parse_file(filename)
self.structureLib = parsedFile.fusion(self.structureLib)
return True
##
## @brief Generate Documentation at the folder ...
## @param[in] destFolder Destination folder.
## @param[in] mode (optinnal) generation output mode {html, markdown ...}
##
def generate(self):
debug.info('Generate documantation code : ' + self.name)
destFolder = "out/doc/" + self.name + '/'
#tools.remove_folder_and_sub_folder(target.get_doc_folder(self.name));
if monkHtml.generate(self, destFolder) == False:
debug.warning("Generation Documentation ==> return an error for " + self.name)
def get_base_doc_node(self):
return self.structureLib
##
## @brief Get the heritage list (parent) of one element.
## @param[in] element Element name.
## @return List of all element herited
##
def get_heritage_list(self, element):
list = []
# get element class :
if element in self.listClass.keys():
localClass = self.listClass[element]
if len(localClass['inherits']) != 0:
# TODO : Support multiple heritage ...
isFirst = True
for heritedClass in localClass['inherits']:
if isFirst == True:
list = self.get_heritage_list(heritedClass['class'])
break;
debug.verbose("find parent : " + element)
list.append(element);
return list
##
## @brief Get the heritage list (child) of this element.
## @param[in] curentClassName Element name.
## @return List of all childs
##
def get_down_heritage_list(self, curentClassName):
list = []
# get element class :
for element in self.listClass:
localClass = self.listClass[element]
if len(localClass['inherits']) != 0:
for heritedClass in localClass['inherits']:
if curentClassName == heritedClass['class']:
list.append(element)
break;
debug.verbose("find childs : " + str(list))
return list
##
## @brief trnsform the classname in a generic link (HTML)
## @param[in] elementName Name of the class requested
## @return [className, link]
##
def get_class_link(self, elementName):
if elementName == "const" \
or elementName == "enum" \
or elementName == "void" \
or elementName == "char" \
or elementName == "char32_t" \
or elementName == "float" \
or elementName == "double" \
or elementName == "bool" \
or elementName == "int8_t" \
or elementName == "uint8_t" \
or elementName == "int16_t" \
or elementName == "uint16_t" \
or elementName == "int32_t" \
or elementName == "uint32_t" \
or elementName == "int64_t" \
or elementName == "uint64_t" \
or elementName == "int" \
or elementName == "T" \
or elementName == "CLASS_TYPE" \
or elementName[:5] == "std::" \
or elementName[:6] == "appl::" \
or elementName == "&" \
or elementName == "*" \
or elementName == "**":
return [elementName, ""]
if elementName in self.listClass.keys():
link = elementName.replace(":","_") + ".html"
return [elementName, link]
elif elementName in self.listEnum.keys():
link = elementName.replace(":","_") + ".html"
return [elementName, link]
#else:
# return self.target.doc_get_link(elementName)
return [elementName, ""]
##
## @brief trnsform the classname in a generic link (HTML) (external access ==> from target)
## @param[in] elementName Name of the class requested
## @return [className, link]
##
def get_class_link_from_target(self, elementName, target):
# reject when auto call :
if self.target != None:
return [elementName, ""]
# search in local list :
if elementName in self.listClass.keys():
link = elementName.replace(":","_") + ".html"
if target.get_build_mode() == "debug":
return [elementName, "../" + self.moduleName + "/" + link]
elif self.webSite != "":
return [elementName, self.webSite + "/" + link]
elif elementName in self.listEnum.keys():
link = elementName.replace(":","_") + ".html"
if target.get_build_mode() == "debug":
return [elementName, "../" + self.moduleName + "/" + link]
elif self.webSite != "":
return [elementName, self.webSite + "/" + link]
# did not find :
return [elementName, ""]
##
## @brief Get link on a class or an enum in all the subclasses
## @param[in] name of the class
## @return [real element name, link on it]
##
def doc_get_link(self, target, elementName):
if self.documentation == None:
return [elementName, ""]
return self.documentation.get_class_link_from_target(elementName, target);
def display(self, target):
print '-----------------------------------------------'
print ' package : "' + self.name + '"'
print '-----------------------------------------------'
print ' type:"%s"' %self.type
print ' file:"%s"' %self.originFile
print ' folder:"%s"' %self.originFolder
self.print_list('local_path',self.local_path)
moduleList=[]
__startModuleName="monk_"
def import_path(path):
global moduleList
matches = []
debug.debug('Start find sub File : "%s"' %path)
for root, dirnames, filenames in os.walk(path):
tmpList = fnmatch.filter(filenames, __startModuleName + "*.py")
# Import the module :
for filename in tmpList:
debug.debug(' Find a file : "%s"' %os.path.join(root, filename))
#matches.append(os.path.join(root, filename))
sys.path.append(os.path.dirname(os.path.join(root, filename)) )
moduleName = filename.replace('.py', '')
moduleName = moduleName.replace(__startModuleName, '')
debug.debug("integrate module: '" + moduleName + "' from '" + os.path.join(root, filename) + "'")
theModule = __import__(__startModuleName + moduleName)
tmpElement = theModule.create()
tmpdesc = theModule.get_desc()
if (tmpElement == None) :
debug.warning("Request load module '" + name + "' not define for this platform")
moduleList.append({"name":moduleName, "path":os.path.join(root, filename), "node":tmpElement, "desc":tmpdesc})
def get_module(name):
global moduleList
for mod in moduleList:
if mod["name"] == name:
return mod["node"]
return None
def get_all_module():
global moduleList
AllList = []
for mod in moduleList:
AllList.append(mod["node"])
return AllList
def list_all_module_with_desc():
global moduleList
tmpList = []
for mod in moduleList:
tmpList.append([mod["name"], mod["desc"]])
return tmpList

18
monkNamespace.py Normal file
View File

@ -0,0 +1,18 @@
#!/usr/bin/python
import monkDebug as debug
import monkNode as Node
class Namespace(Node.Node):
def __init__(self, stack=[], file="", lineNumber=0, documentation=[]):
if len(stack) != 2:
debug.error("Can not parse namespace : " + str(stack))
Node.Node.__init__(self, 'namespace', stack[1], file, lineNumber, documentation)
# enable sub list
self.subList = []
debug.verbose("find namespace : " + self.to_str())
def to_str(self) :
return "namespace " + self.name + " { ... };"

144
monkNode.py Normal file
View File

@ -0,0 +1,144 @@
#!/usr/bin/python
import monkDebug as debug
accessList = ['private', 'protected', 'public']
def debug_space(level):
ret = ""
for iii in range(0,level):
ret += " "
return ret
genericUID = 0
class Node():
def __init__(self, type, name="", file="", lineNumber=0, documentation=[]):
global genericUID
genericUID+=1
self.uid = genericUID
self.documenatationCode = documentation
self.nodeType = type
self.name = name
self.doc = None
self.fileName = file
self.lineNumber = lineNumber
self.subList = None
self.access = None
def to_str(self):
return ""
def str(self):
return self.to_str()
def get_node_type(self):
return self.nodeType
def get_name(self):
return self.name
def get_uid(self):
return self.uid
def get_doc(self):
#debug.info(str(self.doc))
if self.documenatationCode== None:
return ""
ret = ""
isFirst = True
for req in self.documenatationCode:
if isFirst == False:
ret += '\n'
isFirst = False
ret += req
return ret
def debug_display(self, level=0, access = None):
if access == 'private':
debug.info(debug_space(level) + "- " + self.nodeType + " => " + self.name)
elif access == 'protected':
debug.info(debug_space(level) + "# " + self.nodeType + " => " + self.name)
elif access == 'public':
debug.info(debug_space(level) + "+ " + self.nodeType + " => " + self.name)
else:
debug.info(debug_space(level) + self.nodeType + " => " + self.name)
if self.subList!= None:
for element in self.subList:
if 'access' in element.keys():
element['node'].debug_display(level+1, element['access'])
else:
element['node'].debug_display(level+1)
def set_access(self, access):
if access not in accessList:
debug.warning("This is not a valid access : '" + access + "' : availlable : " + str(accessList))
return
if self.access == None:
debug.error("This Node does not support acces configuration...")
return
self.access = access
def get_access(self):
return self.access
def append(self, newSubElement):
# just add it in a sub List :
if self.subList == None:
debug.error("can not add a '" + newSubElement.nodeType + "' at this '" + self.nodeType + "'")
return
if newSubElement.get_node_type() != 'namespace':
if self.access == None:
self.subList.append({'node' : newSubElement})
else:
self.subList.append({'access' : self.access, 'node' : newSubElement})
return
# check if the element already exist
for element in self.subList:
if element['node'].get_node_type() == 'namespace':
if element['node'].get_name() == newSubElement.get_name():
debug.verbose("fusionate with previous declaration")
element['node'].fusion(newSubElement)
return
# normal case adding :
if self.access == None:
self.subList.append({'node' : newSubElement})
else:
self.subList.append({'access' : self.access, 'node' : newSubElement})
##
## @ brief only for namespace :
##
##
def fusion(self, addedElement):
for element in addedElement.subList:
self.append(element['node'])
##
## @brief Get the list of all specify type
## @param[in] type Type requested ['namespace', 'class', 'struct', 'methode', 'enum', 'define', 'union', 'variable', 'constructor', 'destructor'] (can be a list)
## @param[in] sorted Request to sort the return list.
## @return The requested list or []
##
def get_all_sub_type(self, type='all', sorted = False):
if type == 'all':
return self.subList
if isinstance(type, list) == False:
type = [type]
if self.subList == None:
return []
ret = []
for element in self.subList:
if element['node'].get_node_type() in type:
ret.append(element)
if sorted == True:
# TODO : Sorted the list ...
pass
return ret
class MainNode(Node):
def __init__(self, type="library", name=""):
Node.__init__(self, type, name)
self.subList = []

546
monkParse.py Normal file
View File

@ -0,0 +1,546 @@
#!/usr/bin/python
import os
import sys
import re
import monkTools as tools
sys.path.append(tools.get_current_path(__file__) + "/ply/ply/")
sys.path.append(tools.get_current_path(__file__) + "/codeBB/")
sys.path.append(tools.get_current_path(__file__) + "/codeHL/")
import lex
import inspect
import monkDebug as debug
import monkClass as Class
import monkNamespace as Namespace
import monkStruct as Struct
import monkUnion as Union
import monkMethode as Methode
import monkEnum as Enum
import monkVariable as Variable
import monkNode as Node
tokens = [
'NUMBER',
'NAME',
'OPEN_PAREN',
'CLOSE_PAREN',
'OPEN_BRACE',
'CLOSE_BRACE',
'OPEN_SQUARE_BRACKET',
'CLOSE_SQUARE_BRACKET',
'COLON',
'SEMI_COLON',
'COMMA',
'TAB',
'BACKSLASH',
'PIPE',
'PERCENT',
'EXCLAMATION',
'CARET',
'COMMENT_SINGLELINE_DOC_PREVIOUS',
'COMMENT_SINGLELINE_DOC',
'COMMENT_SINGLELINE',
'COMMENT_MULTILINE_DOC',
'COMMENT_MULTILINE',
'PRECOMP',
'ASTERISK',
'AMPERSTAND',
'EQUALS',
'MINUS',
'PLUS',
'DIVIDE',
'CHAR_LITERAL',
'STRING_LITERAL',
'NEW_LINE',
'SQUOTE',
]
t_ignore = " \r.?@\f"
t_NUMBER = r'[0-9][0-9XxA-Fa-f]*'
t_NAME = r'[<>A-Za-z_~][A-Za-z0-9_]*'
t_OPEN_PAREN = r'\('
t_CLOSE_PAREN = r'\)'
t_OPEN_BRACE = r'{'
t_CLOSE_BRACE = r'}'
t_OPEN_SQUARE_BRACKET = r'\['
t_CLOSE_SQUARE_BRACKET = r'\]'
t_SEMI_COLON = r';'
t_COLON = r':'
t_COMMA = r','
def t_TAB(t):
r'\t'
t_BACKSLASH = r'\\'
t_PIPE = r'\|'
t_PERCENT = r'%'
t_CARET = r'\^'
t_EXCLAMATION = r'!'
def t_PRECOMP(t):
r'\#.*?\n'
t.value = re.sub(r'\#\#multiline\#\#', "\\\n", t.value)
t.lexer.lineno += len(filter(lambda a: a=="\n", t.value))
return t
def t_COMMENT_SINGLELINE_DOC_PREVIOUS(t):
r'//(/|!)<.*?\n'
t.lexer.lineno += 1
t.value = t.value[4:]
while t.value[0] in ['\n', '\t', ' ']:
if len(t.value) <= 2:
break
t.value = t.value[1:]
while t.value[-1] in ['\n', '\t', ' ']:
if len(t.value) <= 2:
break
t.value = t.value[:-1]
return t
def t_COMMENT_SINGLELINE_DOC(t):
r'//(/|!).*?\n'
t.lexer.lineno += 1
t.value = t.value[3:]
while t.value[0] in ['\n', '\t', ' ']:
if len(t.value) <= 2:
break
t.value = t.value[1:]
while t.value[-1] in ['\n', '\t', ' ']:
if len(t.value) <= 2:
break
t.value = t.value[:-1]
return t
def t_COMMENT_SINGLELINE(t):
r'\/\/.*\n'
t.lexer.lineno += 1
t_ASTERISK = r'\*'
t_MINUS = r'\-'
t_PLUS = r'\+'
t_DIVIDE = r'/(?!/)'
t_AMPERSTAND = r'&'
t_EQUALS = r'='
t_CHAR_LITERAL = "'.'"
t_SQUOTE = "'"
#found at http://wordaligned.org/articles/string-literals-and-regular-expressions
#TODO: This does not work with the string "bla \" bla"
t_STRING_LITERAL = r'"([^"\\]|\\.)*"'
#Found at http://ostermiller.org/findcomment.html
def t_COMMENT_MULTILINE_DOC(t):
r'/\*(\*|!)(\n|.)*?\*/'
t.lexer.lineno += len(filter(lambda a: a=="\n", t.value))
t.value = re.sub("( |\t)*\*", "", t.value[3:-2])
while t.value[0] == '\n':
if len(t.value) <= 2:
break
t.value = t.value[1:]
while t.value[-1] in ['\n', '\t', ' ']:
if len(t.value) <= 2:
break
t.value = t.value[:-1]
removeLen = 9999
listElement = t.value.split('\n')
for line in listElement:
tmpLen = 0
for element in line:
if element == ' ':
tmpLen += 1
else:
break;
if removeLen > tmpLen:
removeLen = tmpLen
if removeLen == 9999:
return t
ret = ""
isFirst = True
for line in listElement:
if isFirst == False:
ret += '\n'
isFirst = False
ret += line[removeLen:]
t.value = ret
return t
def t_COMMENT_MULTILINE(t):
r'/\*(\n|.)*?\*/'
t.lexer.lineno += len(filter(lambda a: a=="\n", t.value))
def t_NEWLINE(t):
r'\n+'
t.lexer.lineno += len(t.value)
def t_error(v):
print( "Lex error: ", v )
lex.lex()
##
## @brief Join the class name element : ['class', 'Bar', ':', ':', 'Foo'] -> ['class', 'Bar::Foo']
## @param table Input table to convert. ex: [':', '\t', 'class', 'Bar', ':', ':', 'Foo']
## @return The new table. ex: ['class', 'Bar::Foo']
##
def create_compleate_class_name(table):
if "::" not in "".join(table):
out = table
else:
# we need to convert it :
out = []
for name in table:
if len(out) == 0:
out.append(name)
elif name == ":" \
and out[-1].endswith(":"):
out[-1] += name
elif out[-1].endswith("::"):
out[-2] += out[-1] + name
del out[-1]
else:
out.append(name)
table = out
if 'operator' not in "".join(table):
out = table
else:
out = []
for name in table:
if len(out) == 0:
out.append(name)
elif name in ['<','>','='] \
and out[-1][:8] == 'operator' \
and len(out[-1])-8 < 2:
out[-1] += name
else:
out.append(name)
return out
class parse_file():
def gen_debug_space(self):
ret = "[" + str(len(self.braceDepthType)+1) + "]"
for iii in range(0,len(self.braceDepthType)):
ret += " "
return ret
def fusion(self, baseNode):
baseNode.fusion(self.mainNode)
return baseNode
def __init__(self, fileName):
self.mainNode = Node.MainNode("main-node", "tmp")
self.m_elementParseStack = []
debug.debug("Parse file : '" + fileName + "'")
self.headerFileName = fileName
self.anon_union_counter = [-1, 0]
# load all the file data :
headerFileStr = tools.file_read_data(fileName)
# Strip out template declarations
# TODO : What is the real need ???
headerFileStr = re.sub("template[\t ]*<[^>]*>", "", headerFileStr)
# remove all needed \r unneeded ==> this simplify next resExp ...
headerFileStr = re.sub("\r", "\r\n", headerFileStr)
headerFileStr = re.sub("\r\n\n", "\r\n", headerFileStr)
headerFileStr = re.sub("\r", "", headerFileStr)
# TODO : Can generate some error ...
headerFileStr = re.sub("\#if 0(.*?)(\#endif|\#else)", "", headerFileStr, flags=re.DOTALL)
headerFileafter = re.sub("\@interface(.*?)\@end", "", headerFileStr, flags=re.DOTALL)
if headerFileStr != headerFileafter :
debug.debug(" Objective C interface ... ==> not supported")
return
#debug.verbose(headerFileStr)
#Filter out Extern "C" statements. These are order dependent
headerFileStr = re.sub(r'extern( |\t)+"[Cc]"( |\t)*{', "{", headerFileStr)
headerFileStr = re.sub(r'\\\n', "##multiline##", headerFileStr)
headerFileStr += '\n'
debug.debug(headerFileStr)
###### debug.info(headerFileStr)
self.stack = [] # token stack to find the namespace and the element name ...
self.nameStack = [] #
self.braceDepth = 0
self.braceDepthType = []
self.lastComment = []
self.subModuleCountBrace = 0;
lex.lex()
lex.input(headerFileStr)
self.curLine = 0
self.curChar = 0
while True:
tok = lex.token()
if not tok:
break
debug.debug("TOK: " + str(tok))
self.stack.append( tok.value )
self.curLine = tok.lineno
self.curChar = tok.lexpos
# special case to remove internal function define in header:
if self.previous_is('function') == True:
if tok.type == 'OPEN_BRACE':
self.subModuleCountBrace += 1
elif tok.type == 'CLOSE_BRACE':
self.subModuleCountBrace -= 1
if self.subModuleCountBrace <= 0:
self.brace_type_pop()
continue
# normal case:
if tok.type == 'PRECOMP':
debug.debug("PRECOMP: " + str(tok))
self.stack = []
self.nameStack = []
self.lastComment = []
# Do nothing for macro ==> many time not needed ...
continue
if tok.type == 'COMMENT_SINGLELINE_DOC_PREVIOUS':
#self.lastComment.append(tok.value)
pass
if tok.type == 'COMMENT_MULTILINE_DOC':
self.lastComment.append(tok.value)
if tok.type == 'COMMENT_SINGLELINE_DOC':
self.lastComment.append(tok.value)
if tok.type == 'OPEN_BRACE':
# When we open a brace, this is the time to parse the stack ...
# Clean the stack : (remove \t\r\n , and concatenate the 'xx', ':', ':', 'yy' in 'xx::yy',
self.nameStack = create_compleate_class_name(self.nameStack)
if len(self.nameStack) <= 0:
#open brace with no name ...
self.brace_type_push('empty', [])
elif is_a_function(self.nameStack):
# need to parse sub function internal description...
self.subModuleCountBrace = 1
self.brace_type_push('function', self.nameStack)
elif 'namespace' in self.nameStack:
self.brace_type_push('namespace', self.nameStack)
elif 'class' in self.nameStack:
self.brace_type_push('class', self.nameStack)
elif 'enum' in self.nameStack:
self.brace_type_push('enum', self.nameStack)
elif 'struct' in self.nameStack:
self.brace_type_push('struct', self.nameStack)
elif 'typedef' in self.nameStack:
self.brace_type_push('typedef', self.nameStack)
elif 'union' in self.nameStack:
self.brace_type_push('union', self.nameStack)
else:
self.brace_type_push('unknow', self.nameStack)
self.stack = []
self.nameStack = []
elif tok.type == 'CLOSE_BRACE':
if len(self.nameStack) != 0:
if self.previous_is('enum') == True:
self.brace_type_append('enum list', self.nameStack);
else:
debug.warning(self.gen_debug_space() + "end brace DROP : " + str(self.nameStack));
self.stack = []
self.nameStack = []
self.lastComment = []
self.brace_type_pop()
self.nameStack = create_compleate_class_name(self.nameStack)
if tok.type == 'OPEN_PAREN':
self.nameStack.append(tok.value)
elif tok.type == 'CLOSE_PAREN':
self.nameStack.append(tok.value)
elif tok.type == 'OPEN_SQUARE_BRACKET':
self.nameStack.append(tok.value)
elif tok.type == 'CLOSE_SQUARE_BRACKET':
self.nameStack.append(tok.value)
elif tok.type == 'EQUALS':
self.nameStack.append(tok.value)
elif tok.type == 'COMMA':
self.nameStack.append(tok.value)
elif tok.type == 'BACKSLASH':
self.nameStack.append(tok.value)
elif tok.type == 'PIPE':
self.nameStack.append(tok.value)
elif tok.type == 'PERCENT':
self.nameStack.append(tok.value)
elif tok.type == 'CARET':
self.nameStack.append(tok.value)
elif tok.type == 'EXCLAMATION':
self.nameStack.append(tok.value)
elif tok.type == 'SQUOTE':
self.nameStack.append(tok.value)
elif tok.type == 'NUMBER':
self.nameStack.append(tok.value)
elif tok.type == 'MINUS':
self.nameStack.append(tok.value)
elif tok.type == 'PLUS':
self.nameStack.append(tok.value)
elif tok.type == 'STRING_LITERAL':
self.nameStack.append(tok.value)
elif tok.type == 'NAME' \
or tok.type == 'AMPERSTAND' \
or tok.type == 'ASTERISK' \
or tok.type == 'CHAR_LITERAL':
self.nameStack.append(tok.value)
elif tok.type == 'COLON':
if self.nameStack[0] in Node.accessList:
debug.debug(self.gen_debug_space() + "change visibility : " + self.nameStack[0]);
self.brace_type_change_access(self.nameStack[0])
self.nameStack = []
self.stack = []
else :
self.nameStack.append(tok.value)
elif tok.type == 'SEMI_COLON':
if len(self.nameStack) != 0:
self.nameStack = create_compleate_class_name(self.nameStack)
if is_a_function(self.nameStack):
self.brace_type_append('function', self.nameStack);
elif 'namespace' in self.nameStack:
debug.debug(self.gen_debug_space() + "find a namespace DECLARATION : " + str(self.nameStack));
elif 'class' in self.nameStack:
debug.debug(self.gen_debug_space() + "find a class DECLARATION : " + str(self.nameStack));
elif 'enum' in self.nameStack:
debug.debug(self.gen_debug_space() + "find a enum DECLARATION : " + str(self.nameStack));
elif 'struct' in self.nameStack:
debug.debug(self.gen_debug_space() + "find a struct DECLARATION : " + str(self.nameStack));
elif 'typedef' in self.nameStack:
debug.info(self.gen_debug_space() + "find a typedef DECLARATION : " + str(self.nameStack));
elif 'union' in self.nameStack:
debug.debug(self.gen_debug_space() + "find a union DECLARATION : " + str(self.nameStack));
else:
if self.previous_is('enum') == True:
self.brace_type_append('enum list', self.nameStack);
else:
# TODO : Check if it is true in all case :
self.brace_type_append('variable', self.nameStack);
#debug.warning(self.gen_debug_space() + "semicolumn : " + str(self.nameStack));
self.stack = []
self.nameStack = []
self.lastComment = []
#self.debug_display();
def debug_display(self):
debug.info("Debug display :")
self.mainNode.debug_display(1)
def create_element(self, type, stack):
ret = None
if type == 'empty' \
or type == 'enum list':
pass
elif type == 'namespace':
ret = Namespace.Namespace(stack, self.headerFileName, self.curLine, self.lastComment)
elif type == 'class':
ret = Class.Class(stack, self.headerFileName, self.curLine, self.lastComment)
elif type == 'struct':
ret = Struct.Struct(stack, self.headerFileName, self.curLine, self.lastComment)
elif type == 'typedef':
#ret = Namespace.Namespace(stack, self.headerFileName, self.curLine)
# TODO ...
pass
elif type == 'union':
ret = Union.Union(stack, self.headerFileName, self.curLine, self.lastComment)
elif type == 'function':
#debug.info(str(self.lastComment))
ret = Methode.Methode(stack, self.headerFileName, self.curLine, self.lastComment)
elif type == 'enum':
ret = Enum.Enum(stack, self.headerFileName, self.curLine, self.lastComment)
elif type == 'variable':
ret = Variable.Variable(stack, self.headerFileName, self.curLine, self.lastComment)
else:
debug.error("unknow type ...")
return ret
def brace_type_push(self, type, stack):
debug.debug(self.gen_debug_space() + "find a <<" + type + ">> : " + str(stack));
myClassElement = self.create_element(type, stack)
element = { 'type' : type,
'stack' : stack,
'node' : myClassElement
}
self.braceDepthType.append(element)
#debug.info ("append : " + str(element))
def brace_type_append_current(self, element, id = -50):
if id == -50:
id = len(self.braceDepthType)-1
if id >= 0:
while self.braceDepthType[id]['node'] == None:
# special case for empty brace, just add it to the upper
id -=1
if id < 0:
break;
if id < 0:
self.mainNode.append(element)
else:
self.braceDepthType[id]['node'].append(element)
def brace_type_append(self, type, stack):
debug.debug(self.gen_debug_space() + " append a <<" + type + ">> : " + str(stack));
lastType = self.get_last_type()
newType = self.create_element(type, stack)
if newType != None:
self.brace_type_append_current(newType)
return
# enum sub list:
if lastType == 'enum' \
and type == 'enum list':
id = len(self.braceDepthType)-1
self.braceDepthType[id]['node'].enum_append(stack)
return
debug.info("TODO : Parse the special type")
def brace_type_pop(self):
id = len(self.braceDepthType)-1
if id < 0:
debug.warning("Try to pop the stack with No more element ...")
return
if self.braceDepthType[id]['node'] == None:
# nothing to add at the upper ...
pass
else:
# add it on the previous
self.brace_type_append_current(self.braceDepthType[id]['node'], id-1)
self.braceDepthType.pop()
def brace_type_change_access(self, newOne):
if newOne not in Node.accessList:
debug.error("unknow access type : " + newOne)
return
id = len(self.braceDepthType)-1
if id >= 0:
while self.braceDepthType[id]['node'] == None:
# special case for empty brace, just add it to the upper
id -=1
if id < 0:
break;
if id < 0:
debug.warning("can not change the main access on the library")
else:
if self.braceDepthType[id]['node'].get_access() == None:
debug.error("Can not set access in other as : 'class' or 'struct' :" + str(self.braceDepthType[id]))
return
self.braceDepthType[id]['node'].set_access(newOne)
def previous_is(self, type):
if self.get_last_type() == type:
return True
return False
def get_last_type(self):
if len(self.braceDepthType) > 0:
return self.braceDepthType[len(self.braceDepthType)-1]['type']
return None
def is_a_function(stack) :
# in a function we need to have functionName + ( + )
if len(stack) < 3:
return False
if ':' in stack:
res = []
for element in stack:
if element != ':':
res.append(element)
else:
break
stack = res
if stack[len(stack)-2] == '=' \
and stack[len(stack)-1] == '0':
stack = stack[:len(stack)-2]
#can end with 2 possibilities : ')', 'const' or ')'
if stack[len(stack)-1] == ')' \
or ( stack[len(stack)-2] == ')' \
and stack[len(stack)-1] == 'const'):
return True
return False

16
monkStruct.py Normal file
View File

@ -0,0 +1,16 @@
#!/usr/bin/python
import monkDebug as debug
import monkNode as Node
class Struct(Node.Node):
def __init__(self, stack=[], file="", lineNumber=0, documentation=[]):
name = ""
Node.Node.__init__(self, 'struct', name, file, lineNumber, documentation)
self.access = "public"
self.subList = []
def to_str(self) :
return "struct " + self.name + " { ... };"

112
monkTools.py Normal file
View File

@ -0,0 +1,112 @@
#!/usr/bin/python
import os
import shutil
import errno
import monkDebug as debug
import fnmatch
def get_run_folder():
return os.getcwd()
def get_current_path(file):
return os.path.dirname(os.path.realpath(file))
def create_directory_of_file(file):
folder = os.path.dirname(file)
try:
os.stat(folder)
except:
os.makedirs(folder)
def remove_folder_and_sub_folder(path):
if os.path.isdir(path):
debug.verbose("remove folder : '" + path + "'")
shutil.rmtree(path)
def remove_file(path):
if os.path.isfile(path):
os.remove(path)
def file_size(path):
if not os.path.isfile(path):
return 0
statinfo = os.stat(path)
return statinfo.st_size
def file_read_data(path):
if not os.path.isfile(path):
return ""
file = open(path, "r")
data_file = file.read()
file.close()
return data_file
def file_write_data(path, data):
file = open(path, "w")
file.write(data)
file.close()
def list_to_str(list):
if type(list) == type(str()):
return list + " "
else:
result = ""
# mulyiple imput in the list ...
for elem in list:
result += ListToStr(elem)
return result
def add_prefix(prefix,list):
if type(list) == type(None):
return ""
if type(list) == type(str()):
return prefix+list
else:
if len(list)==0:
return ''
else:
result=[]
for elem in list:
result.append(prefix+elem)
return result
def copy_file(src, dst, force=False):
if os.path.exists(src)==False:
debug.error("Request a copy a file that does not existed : '" + src + "'")
if os.path.exists(dst):
if force==False \
and os.path.getmtime(dst) > os.path.getmtime(src):
return
debug.print_element("copy file", src, "==>", dst)
create_directory_of_file(dst)
shutil.copyfile(src, dst)
def copy_anything(src, dst):
tmpPath = os.path.dirname(os.path.realpath(src))
tmpRule = os.path.basename(src)
for root, dirnames, filenames in os.walk(tmpPath):
tmpList = filenames
if len(tmpRule)>0:
tmpList = fnmatch.filter(filenames, tmpRule)
# Import the module :
for cycleFile in tmpList:
#for cycleFile in filenames:
#debug.info("Might copy : '" + tmpPath+cycleFile + "' ==> '" + dst + "'")
copy_file(tmpPath+"/"+cycleFile,dst+"/"+cycleFile)
def copy_anything_target(target, src, dst):
tmpPath = os.path.dirname(os.path.realpath(src))
tmpRule = os.path.basename(src)
for root, dirnames, filenames in os.walk(tmpPath):
tmpList = filenames
if len(tmpRule)>0:
tmpList = fnmatch.filter(filenames, tmpRule)
# Import the module :
for cycleFile in tmpList:
#for cycleFile in filenames:
#debug.info("Might copy : '" + tmpPath+cycleFile + "' ==> '" + dst + "'")
target.add_file_staging(tmpPath+"/"+cycleFile,dst+"/"+cycleFile)

55
monkType.py Normal file
View File

@ -0,0 +1,55 @@
#!/usr/bin/python
import monkDebug as debug
import monkType as Type
import monkNode as Node
import re
class Type():
def __init__(self, stack=[]):
self.name = ""
self.const = False # the const xxxxx
self.reference = False
self.constVar = False # the char* const VarName
if len(stack) == 0:
# None type
return
if len(stack) == 1:
self.name = stack[0]
return;
# check end const
if stack[len(stack)-1] == 'const':
self.constVar = True
stack = stack[:len(stack)-1]
# check if element is a reference ...
if stack[len(stack)-1] == '&':
self.reference = True
stack = stack[:len(stack)-1]
# che k if it start with const ...
if stack[0] == 'const':
self.const = True
stack = stack[1:]
self.name = ""
for element in stack:
self.name += element
def to_str(self) :
ret = ""
if self.const == True:
ret += "const "
ret += self.name
if self.reference == True:
ret += " &"
if self.constVar == True:
ret += " const"
return ret
class TypeVoid(Type):
def __init__(self):
Type.__init__(self, ['void'])
class TypeNone(Type):
def __init__(self):
Type.__init__(self)

14
monkUnion.py Normal file
View File

@ -0,0 +1,14 @@
#!/usr/bin/python
import monkDebug as debug
import monkNode as Node
class Union(Node.Node):
def __init__(self, stack=[], file="", lineNumber=0, documentation=[]):
name = ""
Node.Node.__init__(self, 'union', name, file, lineNumber, documentation)
self.list = []
def to_str(self) :
return "union " + self.name + " { ... };"

87
monkVariable.py Normal file
View File

@ -0,0 +1,87 @@
#!/usr/bin/python
import monkDebug as debug
import monkType as Type
import monkNode as Node
class Variable(Node.Node):
def __init__(self, stack=[], file="", lineNumber=0, documentation=[]):
debug.debug("Parse variable : " + str(stack))
name = ""
if '=' in stack:
plop = []
for element in stack:
if element == "=":
break
plop.append(element)
stack = plop
# TODO : better manageement for xxx[**][**] element:
res = []
for element in stack:
if element == '[':
break
else:
res.append(element)
stack = res
if len(stack) < 2:
if stack[0] == 'void':
pass
else:
debug.error("Can not parse variable : " + str(stack))
else:
name = stack[len(stack)-1]
Node.Node.__init__(self, 'variable', stack[len(stack)-1], file, lineNumber, documentation)
# force the sublist error generation ...
self.subList = None
# default variable :
self.type = Type.TypeNone()
self.static = False
self.external = False
self.volatile = False
#empty name ... ==> this is really bad ...
if name == "":
return
if 'static' in stack:
self.static = True
stack = [value for value in stack if value != 'static']
if 'volatile' in stack:
self.volatile = True
stack = [value for value in stack if value != 'volatile']
if 'external' in stack:
self.external = True
stack = [value for value in stack if value != 'external']
self.type = Type.Type(stack[:len(stack)-1])
debug.verbose("find variable : " + self.to_str())
def to_str(self) :
ret = ""
if self.external == True:
ret += "external "
if self.volatile == True:
ret += "volatile "
if self.static == True:
ret += "static "
ret += self.type.to_str()
ret += " "
ret += self.name
return ret
def get_static(self):
return self.static
def get_volatile(self):
return self.volatile
def get_external(self):
return self.external
def get_type(self):
return self.type

40
ply/ANNOUNCE Normal file
View File

@ -0,0 +1,40 @@
February 17, 2011
Announcing : PLY-3.4 (Python Lex-Yacc)
http://www.dabeaz.com/ply
I'm pleased to announce PLY-3.4--a pure Python implementation of the
common parsing tools lex and yacc. PLY-3.4 is a minor bug fix
release. It supports both Python 2 and Python 3.
If you are new to PLY, here are a few highlights:
- PLY is closely modeled after traditional lex/yacc. If you know how
to use these or similar tools in other languages, you will find
PLY to be comparable.
- PLY provides very extensive error reporting and diagnostic
information to assist in parser construction. The original
implementation was developed for instructional purposes. As
a result, the system tries to identify the most common types
of errors made by novice users.
- PLY provides full support for empty productions, error recovery,
precedence rules, and ambiguous grammars.
- Parsing is based on LR-parsing which is fast, memory efficient,
better suited to large grammars, and which has a number of nice
properties when dealing with syntax errors and other parsing
problems. Currently, PLY can build its parsing tables using
either SLR or LALR(1) algorithms.
More information about PLY can be obtained on the PLY webpage at:
http://www.dabeaz.com/ply
PLY is freely available.
Cheers,
David Beazley (http://www.dabeaz.com)

1093
ply/CHANGES Normal file

File diff suppressed because it is too large Load Diff

22
ply/PKG-INFO Normal file
View File

@ -0,0 +1,22 @@
Metadata-Version: 1.0
Name: ply
Version: 3.4
Summary: Python Lex & Yacc
Home-page: http://www.dabeaz.com/ply/
Author: David Beazley
Author-email: dave@dabeaz.com
License: BSD
Description:
PLY is yet another implementation of lex and yacc for Python. Some notable
features include the fact that its implemented entirely in Python and it
uses LALR(1) parsing which is efficient and well suited for larger grammars.
PLY provides most of the standard lex/yacc features including support for empty
productions, precedence rules, error recovery, and support for ambiguous grammars.
PLY is extremely easy to use and provides very extensive error checking.
It is compatible with both Python 2 and Python 3.
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 2

271
ply/README Normal file
View File

@ -0,0 +1,271 @@
PLY (Python Lex-Yacc) Version 3.4
Copyright (C) 2001-2011,
David M. Beazley (Dabeaz LLC)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the David Beazley or Dabeaz LLC may be used to
endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Introduction
============
PLY is a 100% Python implementation of the common parsing tools lex
and yacc. Here are a few highlights:
- PLY is very closely modeled after traditional lex/yacc.
If you know how to use these tools in C, you will find PLY
to be similar.
- PLY provides *very* extensive error reporting and diagnostic
information to assist in parser construction. The original
implementation was developed for instructional purposes. As
a result, the system tries to identify the most common types
of errors made by novice users.
- PLY provides full support for empty productions, error recovery,
precedence specifiers, and moderately ambiguous grammars.
- Parsing is based on LR-parsing which is fast, memory efficient,
better suited to large grammars, and which has a number of nice
properties when dealing with syntax errors and other parsing problems.
Currently, PLY builds its parsing tables using the LALR(1)
algorithm used in yacc.
- PLY uses Python introspection features to build lexers and parsers.
This greatly simplifies the task of parser construction since it reduces
the number of files and eliminates the need to run a separate lex/yacc
tool before running your program.
- PLY can be used to build parsers for "real" programming languages.
Although it is not ultra-fast due to its Python implementation,
PLY can be used to parse grammars consisting of several hundred
rules (as might be found for a language like C). The lexer and LR
parser are also reasonably efficient when parsing typically
sized programs. People have used PLY to build parsers for
C, C++, ADA, and other real programming languages.
How to Use
==========
PLY consists of two files : lex.py and yacc.py. These are contained
within the 'ply' directory which may also be used as a Python package.
To use PLY, simply copy the 'ply' directory to your project and import
lex and yacc from the associated 'ply' package. For example:
import ply.lex as lex
import ply.yacc as yacc
Alternatively, you can copy just the files lex.py and yacc.py
individually and use them as modules. For example:
import lex
import yacc
The file setup.py can be used to install ply using distutils.
The file doc/ply.html contains complete documentation on how to use
the system.
The example directory contains several different examples including a
PLY specification for ANSI C as given in K&R 2nd Ed.
A simple example is found at the end of this document
Requirements
============
PLY requires the use of Python 2.2 or greater. However, you should
use the latest Python release if possible. It should work on just
about any platform. PLY has been tested with both CPython and Jython.
It also seems to work with IronPython.
Resources
=========
More information about PLY can be obtained on the PLY webpage at:
http://www.dabeaz.com/ply
For a detailed overview of parsing theory, consult the excellent
book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and
Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown
may also be useful.
A Google group for PLY can be found at
http://groups.google.com/group/ply-hack
Acknowledgments
===============
A special thanks is in order for all of the students in CS326 who
suffered through about 25 different versions of these tools :-).
The CHANGES file acknowledges those who have contributed patches.
Elias Ioup did the first implementation of LALR(1) parsing in PLY-1.x.
Andrew Waters and Markus Schoepflin were instrumental in reporting bugs
and testing a revised LALR(1) implementation for PLY-2.0.
Special Note for PLY-3.0
========================
PLY-3.0 the first PLY release to support Python 3. However, backwards
compatibility with Python 2.2 is still preserved. PLY provides dual
Python 2/3 compatibility by restricting its implementation to a common
subset of basic language features. You should not convert PLY using
2to3--it is not necessary and may in fact break the implementation.
Example
=======
Here is a simple example showing a PLY implementation of a calculator
with variables.
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables.
# -----------------------------------------------------------------------------
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
# Ignored characters
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Precedence rules for the arithmetic operators
precedence = (
('left','PLUS','MINUS'),
('left','TIMES','DIVIDE'),
('right','UMINUS'),
)
# dictionary of names (for storing variables)
names = { }
def p_statement_assign(p):
'statement : NAME EQUALS expression'
names[p[1]] = p[3]
def p_statement_expr(p):
'statement : expression'
print(p[1])
def p_expression_binop(p):
'''expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression'''
if p[2] == '+' : p[0] = p[1] + p[3]
elif p[2] == '-': p[0] = p[1] - p[3]
elif p[2] == '*': p[0] = p[1] * p[3]
elif p[2] == '/': p[0] = p[1] / p[3]
def p_expression_uminus(p):
'expression : MINUS expression %prec UMINUS'
p[0] = -p[2]
def p_expression_group(p):
'expression : LPAREN expression RPAREN'
p[0] = p[2]
def p_expression_number(p):
'expression : NUMBER'
p[0] = p[1]
def p_expression_name(p):
'expression : NAME'
try:
p[0] = names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(p):
print("Syntax error at '%s'" % p.value)
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = raw_input('calc > ') # use input() on Python 3
except EOFError:
break
yacc.parse(s)
Bug Reports and Patches
=======================
My goal with PLY is to simply have a decent lex/yacc implementation
for Python. As a general rule, I don't spend huge amounts of time
working on it unless I receive very specific bug reports and/or
patches to fix problems. I also try to incorporate submitted feature
requests and enhancements into each new version. To contact me about
bugs and/or new features, please send email to dave@dabeaz.com.
In addition there is a Google group for discussing PLY related issues at
http://groups.google.com/group/ply-hack
-- Dave

16
ply/TODO Normal file
View File

@ -0,0 +1,16 @@
The PLY to-do list:
1. Finish writing the C Preprocessor module. Started in the
file ply/cpp.py
2. Create and document libraries of useful tokens.
3. Expand the examples/yply tool that parses bison/yacc
files.
4. Think of various diabolical things to do with the
new yacc internals. For example, it is now possible
to specify grammrs using completely different schemes
than the reflection approach used by PLY.

874
ply/doc/internal.html Normal file
View File

@ -0,0 +1,874 @@
<html>
<head>
<title>PLY Internals</title>
</head>
<body bgcolor="#ffffff">
<h1>PLY Internals</h1>
<b>
David M. Beazley <br>
dave@dabeaz.com<br>
</b>
<p>
<b>PLY Version: 3.0</b>
<p>
<!-- INDEX -->
<div class="sectiontoc">
<ul>
<li><a href="#internal_nn1">Introduction</a>
<li><a href="#internal_nn2">Grammar Class</a>
<li><a href="#internal_nn3">Productions</a>
<li><a href="#internal_nn4">LRItems</a>
<li><a href="#internal_nn5">LRTable</a>
<li><a href="#internal_nn6">LRGeneratedTable</a>
<li><a href="#internal_nn7">LRParser</a>
<li><a href="#internal_nn8">ParserReflect</a>
<li><a href="#internal_nn9">High-level operation</a>
</ul>
</div>
<!-- INDEX -->
<H2><a name="internal_nn1"></a>1. Introduction</H2>
This document describes classes and functions that make up the internal
operation of PLY. Using this programming interface, it is possible to
manually build an parser using a different interface specification
than what PLY normally uses. For example, you could build a gramar
from information parsed in a completely different input format. Some of
these objects may be useful for building more advanced parsing engines
such as GLR.
<p>
It should be stressed that using PLY at this level is not for the
faint of heart. Generally, it's assumed that you know a bit of
the underlying compiler theory and how an LR parser is put together.
<H2><a name="internal_nn2"></a>2. Grammar Class</H2>
The file <tt>ply.yacc</tt> defines a class <tt>Grammar</tt> that
is used to hold and manipulate information about a grammar
specification. It encapsulates the same basic information
about a grammar that is put into a YACC file including
the list of tokens, precedence rules, and grammar rules.
Various operations are provided to perform different validations
on the grammar. In addition, there are operations to compute
the first and follow sets that are needed by the various table
generation algorithms.
<p>
<tt><b>Grammar(terminals)</b></tt>
<blockquote>
Creates a new grammar object. <tt>terminals</tt> is a list of strings
specifying the terminals for the grammar. An instance <tt>g</tt> of
<tt>Grammar</tt> has the following methods:
</blockquote>
<p>
<b><tt>g.set_precedence(term,assoc,level)</tt></b>
<blockquote>
Sets the precedence level and associativity for a given terminal <tt>term</tt>.
<tt>assoc</tt> is one of <tt>'right'</tt>,
<tt>'left'</tt>, or <tt>'nonassoc'</tt> and <tt>level</tt> is a positive integer. The higher
the value of <tt>level</tt>, the higher the precedence. Here is an example of typical
precedence settings:
<pre>
g.set_precedence('PLUS', 'left',1)
g.set_precedence('MINUS', 'left',1)
g.set_precedence('TIMES', 'left',2)
g.set_precedence('DIVIDE','left',2)
g.set_precedence('UMINUS','left',3)
</pre>
This method must be called prior to adding any productions to the
grammar with <tt>g.add_production()</tt>. The precedence of individual grammar
rules is determined by the precedence of the right-most terminal.
</blockquote>
<p>
<b><tt>g.add_production(name,syms,func=None,file='',line=0)</tt></b>
<blockquote>
Adds a new grammar rule. <tt>name</tt> is the name of the rule,
<tt>syms</tt> is a list of symbols making up the right hand
side of the rule, <tt>func</tt> is the function to call when
reducing the rule. <tt>file</tt> and <tt>line</tt> specify
the filename and line number of the rule and are used for
generating error messages.
<p>
The list of symbols in <tt>syms</tt> may include character
literals and <tt>%prec</tt> specifiers. Here are some
examples:
<pre>
g.add_production('expr',['expr','PLUS','term'],func,file,line)
g.add_production('expr',['expr','"+"','term'],func,file,line)
g.add_production('expr',['MINUS','expr','%prec','UMINUS'],func,file,line)
</pre>
<p>
If any kind of error is detected, a <tt>GrammarError</tt> exception
is raised with a message indicating the reason for the failure.
</blockquote>
<p>
<b><tt>g.set_start(start=None)</tt></b>
<blockquote>
Sets the starting rule for the grammar. <tt>start</tt> is a string
specifying the name of the start rule. If <tt>start</tt> is omitted,
the first grammar rule added with <tt>add_production()</tt> is taken to be
the starting rule. This method must always be called after all
productions have been added.
</blockquote>
<p>
<b><tt>g.find_unreachable()</tt></b>
<blockquote>
Diagnostic function. Returns a list of all unreachable non-terminals
defined in the grammar. This is used to identify inactive parts of
the grammar specification.
</blockquote>
<p>
<b><tt>g.infinite_cycle()</tt></b>
<blockquote>
Diagnostic function. Returns a list of all non-terminals in the
grammar that result in an infinite cycle. This condition occurs if
there is no way for a grammar rule to expand to a string containing
only terminal symbols.
</blockquote>
<p>
<b><tt>g.undefined_symbols()</tt></b>
<blockquote>
Diagnostic function. Returns a list of tuples <tt>(name, prod)</tt>
corresponding to undefined symbols in the grammar. <tt>name</tt> is the
name of the undefined symbol and <tt>prod</tt> is an instance of
<tt>Production</tt> which has information about the production rule
where the undefined symbol was used.
</blockquote>
<p>
<b><tt>g.unused_terminals()</tt></b>
<blockquote>
Diagnostic function. Returns a list of terminals that were defined,
but never used in the grammar.
</blockquote>
<p>
<b><tt>g.unused_rules()</tt></b>
<blockquote>
Diagnostic function. Returns a list of <tt>Production</tt> instances
corresponding to production rules that were defined in the grammar,
but never used anywhere. This is slightly different
than <tt>find_unreachable()</tt>.
</blockquote>
<p>
<b><tt>g.unused_precedence()</tt></b>
<blockquote>
Diagnostic function. Returns a list of tuples <tt>(term, assoc)</tt>
corresponding to precedence rules that were set, but never used the
grammar. <tt>term</tt> is the terminal name and <tt>assoc</tt> is the
precedence associativity (e.g., <tt>'left'</tt>, <tt>'right'</tt>,
or <tt>'nonassoc'</tt>.
</blockquote>
<p>
<b><tt>g.compute_first()</tt></b>
<blockquote>
Compute all of the first sets for all symbols in the grammar. Returns a dictionary
mapping symbol names to a list of all first symbols.
</blockquote>
<p>
<b><tt>g.compute_follow()</tt></b>
<blockquote>
Compute all of the follow sets for all non-terminals in the grammar.
The follow set is the set of all possible symbols that might follow a
given non-terminal. Returns a dictionary mapping non-terminal names
to a list of symbols.
</blockquote>
<p>
<b><tt>g.build_lritems()</tt></b>
<blockquote>
Calculates all of the LR items for all productions in the grammar. This
step is required before using the grammar for any kind of table generation.
See the section on LR items below.
</blockquote>
<p>
The following attributes are set by the above methods and may be useful
in code that works with the grammar. All of these attributes should be
assumed to be read-only. Changing their values directly will likely
break the grammar.
<p>
<b><tt>g.Productions</tt></b>
<blockquote>
A list of all productions added. The first entry is reserved for
a production representing the starting rule. The objects in this list
are instances of the <tt>Production</tt> class, described shortly.
</blockquote>
<p>
<b><tt>g.Prodnames</tt></b>
<blockquote>
A dictionary mapping the names of nonterminals to a list of all
productions of that nonterminal.
</blockquote>
<p>
<b><tt>g.Terminals</tt></b>
<blockquote>
A dictionary mapping the names of terminals to a list of the
production numbers where they are used.
</blockquote>
<p>
<b><tt>g.Nonterminals</tt></b>
<blockquote>
A dictionary mapping the names of nonterminals to a list of the
production numbers where they are used.
</blockquote>
<p>
<b><tt>g.First</tt></b>
<blockquote>
A dictionary representing the first sets for all grammar symbols. This is
computed and returned by the <tt>compute_first()</tt> method.
</blockquote>
<p>
<b><tt>g.Follow</tt></b>
<blockquote>
A dictionary representing the follow sets for all grammar rules. This is
computed and returned by the <tt>compute_follow()</tt> method.
</blockquote>
<p>
<b><tt>g.Start</tt></b>
<blockquote>
Starting symbol for the grammar. Set by the <tt>set_start()</tt> method.
</blockquote>
For the purposes of debugging, a <tt>Grammar</tt> object supports the <tt>__len__()</tt> and
<tt>__getitem__()</tt> special methods. Accessing <tt>g[n]</tt> returns the nth production
from the grammar.
<H2><a name="internal_nn3"></a>3. Productions</H2>
<tt>Grammar</tt> objects store grammar rules as instances of a <tt>Production</tt> class. This
class has no public constructor--you should only create productions by calling <tt>Grammar.add_production()</tt>.
The following attributes are available on a <tt>Production</tt> instance <tt>p</tt>.
<p>
<b><tt>p.name</tt></b>
<blockquote>
The name of the production. For a grammar rule such as <tt>A : B C D</tt>, this is <tt>'A'</tt>.
</blockquote>
<p>
<b><tt>p.prod</tt></b>
<blockquote>
A tuple of symbols making up the right-hand side of the production. For a grammar rule such as <tt>A : B C D</tt>, this is <tt>('B','C','D')</tt>.
</blockquote>
<p>
<b><tt>p.number</tt></b>
<blockquote>
Production number. An integer containing the index of the production in the grammar's <tt>Productions</tt> list.
</blockquote>
<p>
<b><tt>p.func</tt></b>
<blockquote>
The name of the reduction function associated with the production.
This is the function that will execute when reducing the entire
grammar rule during parsing.
</blockquote>
<p>
<b><tt>p.callable</tt></b>
<blockquote>
The callable object associated with the name in <tt>p.func</tt>. This is <tt>None</tt>
unless the production has been bound using <tt>bind()</tt>.
</blockquote>
<p>
<b><tt>p.file</tt></b>
<blockquote>
Filename associated with the production. Typically this is the file where the production was defined. Used for error messages.
</blockquote>
<p>
<b><tt>p.lineno</tt></b>
<blockquote>
Line number associated with the production. Typically this is the line number in <tt>p.file</tt> where the production was defined. Used for error messages.
</blockquote>
<p>
<b><tt>p.prec</tt></b>
<blockquote>
Precedence and associativity associated with the production. This is a tuple <tt>(assoc,level)</tt> where
<tt>assoc</tt> is one of <tt>'left'</tt>,<tt>'right'</tt>, or <tt>'nonassoc'</tt> and <tt>level</tt> is
an integer. This value is determined by the precedence of the right-most terminal symbol in the production
or by use of the <tt>%prec</tt> specifier when adding the production.
</blockquote>
<p>
<b><tt>p.usyms</tt></b>
<blockquote>
A list of all unique symbols found in the production.
</blockquote>
<p>
<b><tt>p.lr_items</tt></b>
<blockquote>
A list of all LR items for this production. This attribute only has a meaningful value if the
<tt>Grammar.build_lritems()</tt> method has been called. The items in this list are
instances of <tt>LRItem</tt> described below.
</blockquote>
<p>
<b><tt>p.lr_next</tt></b>
<blockquote>
The head of a linked-list representation of the LR items in <tt>p.lr_items</tt>.
This attribute only has a meaningful value if the <tt>Grammar.build_lritems()</tt>
method has been called. Each <tt>LRItem</tt> instance has a <tt>lr_next</tt> attribute
to move to the next item. The list is terminated by <tt>None</tt>.
</blockquote>
<p>
<b><tt>p.bind(dict)</tt></b>
<blockquote>
Binds the production function name in <tt>p.func</tt> to a callable object in
<tt>dict</tt>. This operation is typically carried out in the last step
prior to running the parsing engine and is needed since parsing tables are typically
read from files which only include the function names, not the functions themselves.
</blockquote>
<P>
<tt>Production</tt> objects support
the <tt>__len__()</tt>, <tt>__getitem__()</tt>, and <tt>__str__()</tt>
special methods.
<tt>len(p)</tt> returns the number of symbols in <tt>p.prod</tt>
and <tt>p[n]</tt> is the same as <tt>p.prod[n]</tt>.
<H2><a name="internal_nn4"></a>4. LRItems</H2>
The construction of parsing tables in an LR-based parser generator is primarily
done over a set of "LR Items". An LR item represents a stage of parsing one
of the grammar rules. To compute the LR items, it is first necessary to
call <tt>Grammar.build_lritems()</tt>. Once this step, all of the productions
in the grammar will have their LR items attached to them.
<p>
Here is an interactive example that shows what LR items look like if you
interactively experiment. In this example, <tt>g</tt> is a <tt>Grammar</tt>
object.
<blockquote>
<pre>
>>> <b>g.build_lritems()</b>
>>> <b>p = g[1]</b>
>>> <b>p</b>
Production(statement -> ID = expr)
>>>
</pre>
</blockquote>
In the above code, <tt>p</tt> represents the first grammar rule. In
this case, a rule <tt>'statement -> ID = expr'</tt>.
<p>
Now, let's look at the LR items for <tt>p</tt>.
<blockquote>
<pre>
>>> <b>p.lr_items</b>
[LRItem(statement -> . ID = expr),
LRItem(statement -> ID . = expr),
LRItem(statement -> ID = . expr),
LRItem(statement -> ID = expr .)]
>>>
</pre>
</blockquote>
In each LR item, the dot (.) represents a specific stage of parsing. In each LR item, the dot
is advanced by one symbol. It is only when the dot reaches the very end that a production
is successfully parsed.
<p>
An instance <tt>lr</tt> of <tt>LRItem</tt> has the following
attributes that hold information related to that specific stage of
parsing.
<p>
<b><tt>lr.name</tt></b>
<blockquote>
The name of the grammar rule. For example, <tt>'statement'</tt> in the above example.
</blockquote>
<p>
<b><tt>lr.prod</tt></b>
<blockquote>
A tuple of symbols representing the right-hand side of the production, including the
special <tt>'.'</tt> character. For example, <tt>('ID','.','=','expr')</tt>.
</blockquote>
<p>
<b><tt>lr.number</tt></b>
<blockquote>
An integer representing the production number in the grammar.
</blockquote>
<p>
<b><tt>lr.usyms</tt></b>
<blockquote>
A set of unique symbols in the production. Inherited from the original <tt>Production</tt> instance.
</blockquote>
<p>
<b><tt>lr.lr_index</tt></b>
<blockquote>
An integer representing the position of the dot (.). You should never use <tt>lr.prod.index()</tt>
to search for it--the result will be wrong if the grammar happens to also use (.) as a character
literal.
</blockquote>
<p>
<b><tt>lr.lr_after</tt></b>
<blockquote>
A list of all productions that can legally appear immediately to the right of the
dot (.). This list contains <tt>Production</tt> instances. This attribute
represents all of the possible branches a parse can take from the current position.
For example, suppose that <tt>lr</tt> represents a stage immediately before
an expression like this:
<pre>
>>> <b>lr</b>
LRItem(statement -> ID = . expr)
>>>
</pre>
Then, the value of <tt>lr.lr_after</tt> might look like this, showing all productions that
can legally appear next:
<pre>
>>> <b>lr.lr_after</b>
[Production(expr -> expr PLUS expr),
Production(expr -> expr MINUS expr),
Production(expr -> expr TIMES expr),
Production(expr -> expr DIVIDE expr),
Production(expr -> MINUS expr),
Production(expr -> LPAREN expr RPAREN),
Production(expr -> NUMBER),
Production(expr -> ID)]
>>>
</pre>
</blockquote>
<p>
<b><tt>lr.lr_before</tt></b>
<blockquote>
The grammar symbol that appears immediately before the dot (.) or <tt>None</tt> if
at the beginning of the parse.
</blockquote>
<p>
<b><tt>lr.lr_next</tt></b>
<blockquote>
A link to the next LR item, representing the next stage of the parse. <tt>None</tt> if <tt>lr</tt>
is the last LR item.
</blockquote>
<tt>LRItem</tt> instances also support the <tt>__len__()</tt> and <tt>__getitem__()</tt> special methods.
<tt>len(lr)</tt> returns the number of items in <tt>lr.prod</tt> including the dot (.). <tt>lr[n]</tt>
returns <tt>lr.prod[n]</tt>.
<p>
It goes without saying that all of the attributes associated with LR
items should be assumed to be read-only. Modifications will very
likely create a small black-hole that will consume you and your code.
<H2><a name="internal_nn5"></a>5. LRTable</H2>
The <tt>LRTable</tt> class is used to represent LR parsing table data. This
minimally includes the production list, action table, and goto table.
<p>
<b><tt>LRTable()</tt></b>
<blockquote>
Create an empty LRTable object. This object contains only the information needed to
run an LR parser.
</blockquote>
An instance <tt>lrtab</tt> of <tt>LRTable</tt> has the following methods:
<p>
<b><tt>lrtab.read_table(module)</tt></b>
<blockquote>
Populates the LR table with information from the module specified in <tt>module</tt>.
<tt>module</tt> is either a module object already loaded with <tt>import</tt> or
the name of a Python module. If it's a string containing a module name, it is
loaded and parsing data is extracted. Returns the signature value that was used
when initially writing the tables. Raises a <tt>VersionError</tt> exception if
the module was created using an incompatible version of PLY.
</blockquote>
<p>
<b><tt>lrtab.bind_callables(dict)</tt></b>
<blockquote>
This binds all of the function names used in productions to callable objects
found in the dictionary <tt>dict</tt>. During table generation and when reading
LR tables from files, PLY only uses the names of action functions such as <tt>'p_expr'</tt>,
<tt>'p_statement'</tt>, etc. In order to actually run the parser, these names
have to be bound to callable objects. This method is always called prior to
running a parser.
</blockquote>
After <tt>lrtab</tt> has been populated, the following attributes are defined.
<p>
<b><tt>lrtab.lr_method</tt></b>
<blockquote>
The LR parsing method used (e.g., <tt>'LALR'</tt>)
</blockquote>
<p>
<b><tt>lrtab.lr_productions</tt></b>
<blockquote>
The production list. If the parsing tables have been newly
constructed, this will be a list of <tt>Production</tt> instances. If
the parsing tables have been read from a file, it's a list
of <tt>MiniProduction</tt> instances. This, together
with <tt>lr_action</tt> and <tt>lr_goto</tt> contain all of the
information needed by the LR parsing engine.
</blockquote>
<p>
<b><tt>lrtab.lr_action</tt></b>
<blockquote>
The LR action dictionary that implements the underlying state machine.
The keys of this dictionary are the LR states.
</blockquote>
<p>
<b><tt>lrtab.lr_goto</tt></b>
<blockquote>
The LR goto table that contains information about grammar rule reductions.
</blockquote>
<H2><a name="internal_nn6"></a>6. LRGeneratedTable</H2>
The <tt>LRGeneratedTable</tt> class represents constructed LR parsing tables on a
grammar. It is a subclass of <tt>LRTable</tt>.
<p>
<b><tt>LRGeneratedTable(grammar, method='LALR',log=None)</tt></b>
<blockquote>
Create the LR parsing tables on a grammar. <tt>grammar</tt> is an instance of <tt>Grammar</tt>,
<tt>method</tt> is a string with the parsing method (<tt>'SLR'</tt> or <tt>'LALR'</tt>), and
<tt>log</tt> is a logger object used to write debugging information. The debugging information
written to <tt>log</tt> is the same as what appears in the <tt>parser.out</tt> file created
by yacc. By supplying a custom logger with a different message format, it is possible to get
more information (e.g., the line number in <tt>yacc.py</tt> used for issuing each line of
output in the log). The result is an instance of <tt>LRGeneratedTable</tt>.
</blockquote>
<p>
An instance <tt>lr</tt> of <tt>LRGeneratedTable</tt> has the following attributes.
<p>
<b><tt>lr.grammar</tt></b>
<blockquote>
A link to the Grammar object used to construct the parsing tables.
</blockquote>
<p>
<b><tt>lr.lr_method</tt></b>
<blockquote>
The LR parsing method used (e.g., <tt>'LALR'</tt>)
</blockquote>
<p>
<b><tt>lr.lr_productions</tt></b>
<blockquote>
A reference to <tt>grammar.Productions</tt>. This, together with <tt>lr_action</tt> and <tt>lr_goto</tt>
contain all of the information needed by the LR parsing engine.
</blockquote>
<p>
<b><tt>lr.lr_action</tt></b>
<blockquote>
The LR action dictionary that implements the underlying state machine. The keys of this dictionary are
the LR states.
</blockquote>
<p>
<b><tt>lr.lr_goto</tt></b>
<blockquote>
The LR goto table that contains information about grammar rule reductions.
</blockquote>
<p>
<b><tt>lr.sr_conflicts</tt></b>
<blockquote>
A list of tuples <tt>(state,token,resolution)</tt> identifying all shift/reduce conflicts. <tt>state</tt> is the LR state
number where the conflict occurred, <tt>token</tt> is the token causing the conflict, and <tt>resolution</tt> is
a string describing the resolution taken. <tt>resolution</tt> is either <tt>'shift'</tt> or <tt>'reduce'</tt>.
</blockquote>
<p>
<b><tt>lr.rr_conflicts</tt></b>
<blockquote>
A list of tuples <tt>(state,rule,rejected)</tt> identifying all reduce/reduce conflicts. <tt>state</tt> is the
LR state number where the conflict occurred, <tt>rule</tt> is the production rule that was selected
and <tt>rejected</tt> is the production rule that was rejected. Both <tt>rule</tt> and </tt>rejected</tt> are
instances of <tt>Production</tt>. They can be inspected to provide the user with more information.
</blockquote>
<p>
There are two public methods of <tt>LRGeneratedTable</tt>.
<p>
<b><tt>lr.write_table(modulename,outputdir="",signature="")</tt></b>
<blockquote>
Writes the LR parsing table information to a Python module. <tt>modulename</tt> is a string
specifying the name of a module such as <tt>"parsetab"</tt>. <tt>outputdir</tt> is the name of a
directory where the module should be created. <tt>signature</tt> is a string representing a
grammar signature that's written into the output file. This can be used to detect when
the data stored in a module file is out-of-sync with the the grammar specification (and that
the tables need to be regenerated). If <tt>modulename</tt> is a string <tt>"parsetab"</tt>,
this function creates a file called <tt>parsetab.py</tt>. If the module name represents a
package such as <tt>"foo.bar.parsetab"</tt>, then only the last component, <tt>"parsetab"</tt> is
used.
</blockquote>
<H2><a name="internal_nn7"></a>7. LRParser</H2>
The <tt>LRParser</tt> class implements the low-level LR parsing engine.
<p>
<b><tt>LRParser(lrtab, error_func)</tt></b>
<blockquote>
Create an LRParser. <tt>lrtab</tt> is an instance of <tt>LRTable</tt>
containing the LR production and state tables. <tt>error_func</tt> is the
error function to invoke in the event of a parsing error.
</blockquote>
An instance <tt>p</tt> of <tt>LRParser</tt> has the following methods:
<p>
<b><tt>p.parse(input=None,lexer=None,debug=0,tracking=0,tokenfunc=None)</tt></b>
<blockquote>
Run the parser. <tt>input</tt> is a string, which if supplied is fed into the
lexer using its <tt>input()</tt> method. <tt>lexer</tt> is an instance of the
<tt>Lexer</tt> class to use for tokenizing. If not supplied, the last lexer
created with the <tt>lex</tt> module is used. <tt>debug</tt> is a boolean flag
that enables debugging. <tt>tracking</tt> is a boolean flag that tells the
parser to perform additional line number tracking. <tt>tokenfunc</tt> is a callable
function that returns the next token. If supplied, the parser will use it to get
all tokens.
</blockquote>
<p>
<b><tt>p.restart()</tt></b>
<blockquote>
Resets the parser state for a parse already in progress.
</blockquote>
<H2><a name="internal_nn8"></a>8. ParserReflect</H2>
<p>
The <tt>ParserReflect</tt> class is used to collect parser specification data
from a Python module or object. This class is what collects all of the
<tt>p_rule()</tt> functions in a PLY file, performs basic error checking,
and collects all of the needed information to build a grammar. Most of the
high-level PLY interface as used by the <tt>yacc()</tt> function is actually
implemented by this class.
<p>
<b><tt>ParserReflect(pdict, log=None)</tt></b>
<blockquote>
Creates a <tt>ParserReflect</tt> instance. <tt>pdict</tt> is a dictionary
containing parser specification data. This dictionary typically corresponds
to the module or class dictionary of code that implements a PLY parser.
<tt>log</tt> is a logger instance that will be used to report error
messages.
</blockquote>
An instance <tt>p</tt> of <tt>ParserReflect</tt> has the following methods:
<p>
<b><tt>p.get_all()</tt></b>
<blockquote>
Collect and store all required parsing information.
</blockquote>
<p>
<b><tt>p.validate_all()</tt></b>
<blockquote>
Validate all of the collected parsing information. This is a seprate step
from <tt>p.get_all()</tt> as a performance optimization. In order to
increase parser start-up time, a parser can elect to only validate the
parsing data when regenerating the parsing tables. The validation
step tries to collect as much information as possible rather than
raising an exception at the first sign of trouble. The attribute
<tt>p.error</tt> is set if there are any validation errors. The
value of this attribute is also returned.
</blockquote>
<p>
<b><tt>p.signature()</tt></b>
<blockquote>
Compute a signature representing the contents of the collected parsing
data. The signature value should change if anything in the parser
specification has changed in a way that would justify parser table
regeneration. This method can be called after <tt>p.get_all()</tt>,
but before <tt>p.validate_all()</tt>.
</blockquote>
The following attributes are set in the process of collecting data:
<p>
<b><tt>p.start</tt></b>
<blockquote>
The grammar start symbol, if any. Taken from <tt>pdict['start']</tt>.
</blockquote>
<p>
<b><tt>p.error_func</tt></b>
<blockquote>
The error handling function or <tt>None</tt>. Taken from <tt>pdict['p_error']</tt>.
</blockquote>
<p>
<b><tt>p.tokens</tt></b>
<blockquote>
The token list. Taken from <tt>pdict['tokens']</tt>.
</blockquote>
<p>
<b><tt>p.prec</tt></b>
<blockquote>
The precedence specifier. Taken from <tt>pdict['precedence']</tt>.
</blockquote>
<p>
<b><tt>p.preclist</tt></b>
<blockquote>
A parsed version of the precedence specified. A list of tuples of the form
<tt>(token,assoc,level)</tt> where <tt>token</tt> is the terminal symbol,
<tt>assoc</tt> is the associativity (e.g., <tt>'left'</tt>) and <tt>level</tt>
is a numeric precedence level.
</blockquote>
<p>
<b><tt>p.grammar</tt></b>
<blockquote>
A list of tuples <tt>(name, rules)</tt> representing the grammar rules. <tt>name</tt> is the
name of a Python function or method in <tt>pdict</tt> that starts with <tt>"p_"</tt>.
<tt>rules</tt> is a list of tuples <tt>(filename,line,prodname,syms)</tt> representing
the grammar rules found in the documentation string of that function. <tt>filename</tt> and <tt>line</tt> contain location
information that can be used for debugging. <tt>prodname</tt> is the name of the
production. <tt>syms</tt> is the right-hand side of the production. If you have a
function like this
<pre>
def p_expr(p):
'''expr : expr PLUS expr
| expr MINUS expr
| expr TIMES expr
| expr DIVIDE expr'''
</pre>
then the corresponding entry in <tt>p.grammar</tt> might look like this:
<pre>
('p_expr', [ ('calc.py',10,'expr', ['expr','PLUS','expr']),
('calc.py',11,'expr', ['expr','MINUS','expr']),
('calc.py',12,'expr', ['expr','TIMES','expr']),
('calc.py',13,'expr', ['expr','DIVIDE','expr'])
])
</pre>
</blockquote>
<p>
<b><tt>p.pfuncs</tt></b>
<blockquote>
A sorted list of tuples <tt>(line, file, name, doc)</tt> representing all of
the <tt>p_</tt> functions found. <tt>line</tt> and <tt>file</tt> give location
information. <tt>name</tt> is the name of the function. <tt>doc</tt> is the
documentation string. This list is sorted in ascending order by line number.
</blockquote>
<p>
<b><tt>p.files</tt></b>
<blockquote>
A dictionary holding all of the source filenames that were encountered
while collecting parser information. Only the keys of this dictionary have
any meaning.
</blockquote>
<p>
<b><tt>p.error</tt></b>
<blockquote>
An attribute that indicates whether or not any critical errors
occurred in validation. If this is set, it means that that some kind
of problem was detected and that no further processing should be
performed.
</blockquote>
<H2><a name="internal_nn9"></a>9. High-level operation</H2>
Using all of the above classes requires some attention to detail. The <tt>yacc()</tt>
function carries out a very specific sequence of operations to create a grammar.
This same sequence should be emulated if you build an alternative PLY interface.
<ol>
<li>A <tt>ParserReflect</tt> object is created and raw grammar specification data is
collected.
<li>A <tt>Grammar</tt> object is created and populated with information
from the specification data.
<li>A <tt>LRGenerator</tt> object is created to run the LALR algorithm over
the <tt>Grammar</tt> object.
<li>Productions in the LRGenerator and bound to callables using the <tt>bind_callables()</tt>
method.
<li>A <tt>LRParser</tt> object is created from from the information in the
<tt>LRGenerator</tt> object.
</ol>
</body>
</html>

194
ply/doc/makedoc.py Normal file
View File

@ -0,0 +1,194 @@
#!/usr/local/bin/python
###############################################################################
# Takes a chapter as input and adds internal links and numbering to all
# of the H1, H2, H3, H4 and H5 sections.
#
# Every heading HTML tag (H1, H2 etc) is given an autogenerated name to link
# to. However, if the name is not an autogenerated name from a previous run,
# it will be kept. If it is autogenerated, it might change on subsequent runs
# of this program. Thus if you want to create links to one of the headings,
# then change the heading link name to something that does not look like an
# autogenerated link name.
###############################################################################
import sys
import re
import string
###############################################################################
# Functions
###############################################################################
# Regexs for <a name="..."></a>
alink = re.compile(r"<a *name *= *\"(.*)\"></a>", re.IGNORECASE)
heading = re.compile(r"(_nn\d)", re.IGNORECASE)
def getheadingname(m):
autogeneratedheading = True;
if m.group(1) != None:
amatch = alink.match(m.group(1))
if amatch:
# A non-autogenerated heading - keep it
headingname = amatch.group(1)
autogeneratedheading = heading.match(headingname)
if autogeneratedheading:
# The heading name was either non-existent or autogenerated,
# We can create a new heading / change the existing heading
headingname = "%s_nn%d" % (filenamebase, nameindex)
return headingname
###############################################################################
# Main program
###############################################################################
if len(sys.argv) != 2:
print "usage: makedoc.py filename"
sys.exit(1)
filename = sys.argv[1]
filenamebase = string.split(filename,".")[0]
section = 0
subsection = 0
subsubsection = 0
subsubsubsection = 0
nameindex = 0
name = ""
# Regexs for <h1>,... <h5> sections
h1 = re.compile(r".*?<H1>(<a.*a>)*[\d\.\s]*(.*?)</H1>", re.IGNORECASE)
h2 = re.compile(r".*?<H2>(<a.*a>)*[\d\.\s]*(.*?)</H2>", re.IGNORECASE)
h3 = re.compile(r".*?<H3>(<a.*a>)*[\d\.\s]*(.*?)</H3>", re.IGNORECASE)
h4 = re.compile(r".*?<H4>(<a.*a>)*[\d\.\s]*(.*?)</H4>", re.IGNORECASE)
h5 = re.compile(r".*?<H5>(<a.*a>)*[\d\.\s]*(.*?)</H5>", re.IGNORECASE)
data = open(filename).read() # Read data
open(filename+".bak","w").write(data) # Make backup
lines = data.splitlines()
result = [ ] # This is the result of postprocessing the file
index = "<!-- INDEX -->\n<div class=\"sectiontoc\">\n" # index contains the index for adding at the top of the file. Also printed to stdout.
skip = 0
skipspace = 0
for s in lines:
if s == "<!-- INDEX -->":
if not skip:
result.append("@INDEX@")
skip = 1
else:
skip = 0
continue;
if skip:
continue
if not s and skipspace:
continue
if skipspace:
result.append("")
result.append("")
skipspace = 0
m = h2.match(s)
if m:
prevheadingtext = m.group(2)
nameindex += 1
section += 1
headingname = getheadingname(m)
result.append("""<H2><a name="%s"></a>%d. %s</H2>""" % (headingname,section, prevheadingtext))
if subsubsubsection:
index += "</ul>\n"
if subsubsection:
index += "</ul>\n"
if subsection:
index += "</ul>\n"
if section == 1:
index += "<ul>\n"
index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext)
subsection = 0
subsubsection = 0
subsubsubsection = 0
skipspace = 1
continue
m = h3.match(s)
if m:
prevheadingtext = m.group(2)
nameindex += 1
subsection += 1
headingname = getheadingname(m)
result.append("""<H3><a name="%s"></a>%d.%d %s</H3>""" % (headingname,section, subsection, prevheadingtext))
if subsubsubsection:
index += "</ul>\n"
if subsubsection:
index += "</ul>\n"
if subsection == 1:
index += "<ul>\n"
index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext)
subsubsection = 0
skipspace = 1
continue
m = h4.match(s)
if m:
prevheadingtext = m.group(2)
nameindex += 1
subsubsection += 1
subsubsubsection = 0
headingname = getheadingname(m)
result.append("""<H4><a name="%s"></a>%d.%d.%d %s</H4>""" % (headingname,section, subsection, subsubsection, prevheadingtext))
if subsubsubsection:
index += "</ul>\n"
if subsubsection == 1:
index += "<ul>\n"
index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext)
skipspace = 1
continue
m = h5.match(s)
if m:
prevheadingtext = m.group(2)
nameindex += 1
subsubsubsection += 1
headingname = getheadingname(m)
result.append("""<H5><a name="%s"></a>%d.%d.%d.%d %s</H5>""" % (headingname,section, subsection, subsubsection, subsubsubsection, prevheadingtext))
if subsubsubsection == 1:
index += "<ul>\n"
index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext)
skipspace = 1
continue
result.append(s)
if subsubsubsection:
index += "</ul>\n"
if subsubsection:
index += "</ul>\n"
if subsection:
index += "</ul>\n"
if section:
index += "</ul>\n"
index += "</div>\n<!-- INDEX -->\n"
data = "\n".join(result)
data = data.replace("@INDEX@",index) + "\n";
# Write the file back out
open(filename,"w").write(data)

3262
ply/doc/ply.html Normal file

File diff suppressed because it is too large Load Diff

79
ply/example/BASIC/README Normal file
View File

@ -0,0 +1,79 @@
Inspired by a September 14, 2006 Salon article "Why Johnny Can't Code" by
David Brin (http://www.salon.com/tech/feature/2006/09/14/basic/index.html),
I thought that a fully working BASIC interpreter might be an interesting,
if not questionable, PLY example. Uh, okay, so maybe it's just a bad idea,
but in any case, here it is.
In this example, you'll find a rough implementation of 1964 Dartmouth BASIC
as described in the manual at:
http://www.bitsavers.org/pdf/dartmouth/BASIC_Oct64.pdf
See also:
http://en.wikipedia.org/wiki/Dartmouth_BASIC
This dialect is downright primitive---there are no string variables
and no facilities for interactive input. Moreover, subroutines and functions
are brain-dead even more than they usually are for BASIC. Of course,
the GOTO statement is provided.
Nevertheless, there are a few interesting aspects of this example:
- It illustrates a fully working interpreter including lexing, parsing,
and interpretation of instructions.
- The parser shows how to catch and report various kinds of parsing
errors in a more graceful way.
- The example both parses files (supplied on command line) and
interactive input entered line by line.
- It shows how you might represent parsed information. In this case,
each BASIC statement is encoded into a Python tuple containing the
statement type and parameters. These tuples are then stored in
a dictionary indexed by program line numbers.
- Even though it's just BASIC, the parser contains more than 80
rules and 150 parsing states. Thus, it's a little more meaty than
the calculator example.
To use the example, run it as follows:
% python basic.py hello.bas
HELLO WORLD
%
or use it interactively:
% python basic.py
[BASIC] 10 PRINT "HELLO WORLD"
[BASIC] 20 END
[BASIC] RUN
HELLO WORLD
[BASIC]
The following files are defined:
basic.py - High level script that controls everything
basiclex.py - BASIC tokenizer
basparse.py - BASIC parser
basinterp.py - BASIC interpreter that runs parsed programs.
In addition, a number of sample BASIC programs (.bas suffix) are
provided. These were taken out of the Dartmouth manual.
Disclaimer: I haven't spent a ton of time testing this and it's likely that
I've skimped here and there on a few finer details (e.g., strictly enforcing
variable naming rules). However, the interpreter seems to be able to run
the examples in the BASIC manual.
Have fun!
-Dave

View File

@ -0,0 +1,71 @@
# An implementation of Dartmouth BASIC (1964)
#
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
import basiclex
import basparse
import basinterp
# If a filename has been specified, we try to run it.
# If a runtime error occurs, we bail out and enter
# interactive mode below
if len(sys.argv) == 2:
data = open(sys.argv[1]).read()
prog = basparse.parse(data)
if not prog: raise SystemExit
b = basinterp.BasicInterpreter(prog)
try:
b.run()
raise SystemExit
except RuntimeError:
pass
else:
b = basinterp.BasicInterpreter({})
# Interactive mode. This incrementally adds/deletes statements
# from the program stored in the BasicInterpreter object. In
# addition, special commands 'NEW','LIST',and 'RUN' are added.
# Specifying a line number with no code deletes that line from
# the program.
while 1:
try:
line = raw_input("[BASIC] ")
except EOFError:
raise SystemExit
if not line: continue
line += "\n"
prog = basparse.parse(line)
if not prog: continue
keys = list(prog)
if keys[0] > 0:
b.add_statements(prog)
else:
stat = prog[keys[0]]
if stat[0] == 'RUN':
try:
b.run()
except RuntimeError:
pass
elif stat[0] == 'LIST':
b.list()
elif stat[0] == 'BLANK':
b.del_line(stat[1])
elif stat[0] == 'NEW':
b.new()

View File

@ -0,0 +1,74 @@
# An implementation of Dartmouth BASIC (1964)
from ply import *
keywords = (
'LET','READ','DATA','PRINT','GOTO','IF','THEN','FOR','NEXT','TO','STEP',
'END','STOP','DEF','GOSUB','DIM','REM','RETURN','RUN','LIST','NEW',
)
tokens = keywords + (
'EQUALS','PLUS','MINUS','TIMES','DIVIDE','POWER',
'LPAREN','RPAREN','LT','LE','GT','GE','NE',
'COMMA','SEMI', 'INTEGER','FLOAT', 'STRING',
'ID','NEWLINE'
)
t_ignore = ' \t'
def t_REM(t):
r'REM .*'
return t
def t_ID(t):
r'[A-Z][A-Z0-9]*'
if t.value in keywords:
t.type = t.value
return t
t_EQUALS = r'='
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_POWER = r'\^'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LT = r'<'
t_LE = r'<='
t_GT = r'>'
t_GE = r'>='
t_NE = r'<>'
t_COMMA = r'\,'
t_SEMI = r';'
t_INTEGER = r'\d+'
t_FLOAT = r'((\d*\.\d+)(E[\+-]?\d+)?|([1-9]\d*E[\+-]?\d+))'
t_STRING = r'\".*?\"'
def t_NEWLINE(t):
r'\n'
t.lexer.lineno += 1
return t
def t_error(t):
print("Illegal character %s" % t.value[0])
t.lexer.skip(1)
lex.lex(debug=0)

View File

@ -0,0 +1,79 @@
# An implementation of Dartmouth BASIC (1964)
#
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
import logging
logging.basicConfig(
level = logging.INFO,
filename = "parselog.txt",
filemode = "w"
)
log = logging.getLogger()
import basiclex
import basparse
import basinterp
# If a filename has been specified, we try to run it.
# If a runtime error occurs, we bail out and enter
# interactive mode below
if len(sys.argv) == 2:
data = open(sys.argv[1]).read()
prog = basparse.parse(data,debug=log)
if not prog: raise SystemExit
b = basinterp.BasicInterpreter(prog)
try:
b.run()
raise SystemExit
except RuntimeError:
pass
else:
b = basinterp.BasicInterpreter({})
# Interactive mode. This incrementally adds/deletes statements
# from the program stored in the BasicInterpreter object. In
# addition, special commands 'NEW','LIST',and 'RUN' are added.
# Specifying a line number with no code deletes that line from
# the program.
while 1:
try:
line = raw_input("[BASIC] ")
except EOFError:
raise SystemExit
if not line: continue
line += "\n"
prog = basparse.parse(line,debug=log)
if not prog: continue
keys = list(prog)
if keys[0] > 0:
b.add_statements(prog)
else:
stat = prog[keys[0]]
if stat[0] == 'RUN':
try:
b.run()
except RuntimeError:
pass
elif stat[0] == 'LIST':
b.list()
elif stat[0] == 'BLANK':
b.del_line(stat[1])
elif stat[0] == 'NEW':
b.new()

View File

@ -0,0 +1,441 @@
# This file provides the runtime support for running a basic program
# Assumes the program has been parsed using basparse.py
import sys
import math
import random
class BasicInterpreter:
# Initialize the interpreter. prog is a dictionary
# containing (line,statement) mappings
def __init__(self,prog):
self.prog = prog
self.functions = { # Built-in function table
'SIN' : lambda z: math.sin(self.eval(z)),
'COS' : lambda z: math.cos(self.eval(z)),
'TAN' : lambda z: math.tan(self.eval(z)),
'ATN' : lambda z: math.atan(self.eval(z)),
'EXP' : lambda z: math.exp(self.eval(z)),
'ABS' : lambda z: abs(self.eval(z)),
'LOG' : lambda z: math.log(self.eval(z)),
'SQR' : lambda z: math.sqrt(self.eval(z)),
'INT' : lambda z: int(self.eval(z)),
'RND' : lambda z: random.random()
}
# Collect all data statements
def collect_data(self):
self.data = []
for lineno in self.stat:
if self.prog[lineno][0] == 'DATA':
self.data = self.data + self.prog[lineno][1]
self.dc = 0 # Initialize the data counter
# Check for end statements
def check_end(self):
has_end = 0
for lineno in self.stat:
if self.prog[lineno][0] == 'END' and not has_end:
has_end = lineno
if not has_end:
print("NO END INSTRUCTION")
self.error = 1
return
if has_end != lineno:
print("END IS NOT LAST")
self.error = 1
# Check loops
def check_loops(self):
for pc in range(len(self.stat)):
lineno = self.stat[pc]
if self.prog[lineno][0] == 'FOR':
forinst = self.prog[lineno]
loopvar = forinst[1]
for i in range(pc+1,len(self.stat)):
if self.prog[self.stat[i]][0] == 'NEXT':
nextvar = self.prog[self.stat[i]][1]
if nextvar != loopvar: continue
self.loopend[pc] = i
break
else:
print("FOR WITHOUT NEXT AT LINE %s" % self.stat[pc])
self.error = 1
# Evaluate an expression
def eval(self,expr):
etype = expr[0]
if etype == 'NUM': return expr[1]
elif etype == 'GROUP': return self.eval(expr[1])
elif etype == 'UNARY':
if expr[1] == '-': return -self.eval(expr[2])
elif etype == 'BINOP':
if expr[1] == '+': return self.eval(expr[2])+self.eval(expr[3])
elif expr[1] == '-': return self.eval(expr[2])-self.eval(expr[3])
elif expr[1] == '*': return self.eval(expr[2])*self.eval(expr[3])
elif expr[1] == '/': return float(self.eval(expr[2]))/self.eval(expr[3])
elif expr[1] == '^': return abs(self.eval(expr[2]))**self.eval(expr[3])
elif etype == 'VAR':
var,dim1,dim2 = expr[1]
if not dim1 and not dim2:
if var in self.vars:
return self.vars[var]
else:
print("UNDEFINED VARIABLE %s AT LINE %s" % (var, self.stat[self.pc]))
raise RuntimeError
# May be a list lookup or a function evaluation
if dim1 and not dim2:
if var in self.functions:
# A function
return self.functions[var](dim1)
else:
# A list evaluation
if var in self.lists:
dim1val = self.eval(dim1)
if dim1val < 1 or dim1val > len(self.lists[var]):
print("LIST INDEX OUT OF BOUNDS AT LINE %s" % self.stat[self.pc])
raise RuntimeError
return self.lists[var][dim1val-1]
if dim1 and dim2:
if var in self.tables:
dim1val = self.eval(dim1)
dim2val = self.eval(dim2)
if dim1val < 1 or dim1val > len(self.tables[var]) or dim2val < 1 or dim2val > len(self.tables[var][0]):
print("TABLE INDEX OUT OUT BOUNDS AT LINE %s" % self.stat[self.pc])
raise RuntimeError
return self.tables[var][dim1val-1][dim2val-1]
print("UNDEFINED VARIABLE %s AT LINE %s" % (var, self.stat[self.pc]))
raise RuntimeError
# Evaluate a relational expression
def releval(self,expr):
etype = expr[1]
lhs = self.eval(expr[2])
rhs = self.eval(expr[3])
if etype == '<':
if lhs < rhs: return 1
else: return 0
elif etype == '<=':
if lhs <= rhs: return 1
else: return 0
elif etype == '>':
if lhs > rhs: return 1
else: return 0
elif etype == '>=':
if lhs >= rhs: return 1
else: return 0
elif etype == '=':
if lhs == rhs: return 1
else: return 0
elif etype == '<>':
if lhs != rhs: return 1
else: return 0
# Assignment
def assign(self,target,value):
var, dim1, dim2 = target
if not dim1 and not dim2:
self.vars[var] = self.eval(value)
elif dim1 and not dim2:
# List assignment
dim1val = self.eval(dim1)
if not var in self.lists:
self.lists[var] = [0]*10
if dim1val > len(self.lists[var]):
print ("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc])
raise RuntimeError
self.lists[var][dim1val-1] = self.eval(value)
elif dim1 and dim2:
dim1val = self.eval(dim1)
dim2val = self.eval(dim2)
if not var in self.tables:
temp = [0]*10
v = []
for i in range(10): v.append(temp[:])
self.tables[var] = v
# Variable already exists
if dim1val > len(self.tables[var]) or dim2val > len(self.tables[var][0]):
print("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc])
raise RuntimeError
self.tables[var][dim1val-1][dim2val-1] = self.eval(value)
# Change the current line number
def goto(self,linenum):
if not linenum in self.prog:
print("UNDEFINED LINE NUMBER %d AT LINE %d" % (linenum, self.stat[self.pc]))
raise RuntimeError
self.pc = self.stat.index(linenum)
# Run it
def run(self):
self.vars = { } # All variables
self.lists = { } # List variables
self.tables = { } # Tables
self.loops = [ ] # Currently active loops
self.loopend= { } # Mapping saying where loops end
self.gosub = None # Gosub return point (if any)
self.error = 0 # Indicates program error
self.stat = list(self.prog) # Ordered list of all line numbers
self.stat.sort()
self.pc = 0 # Current program counter
# Processing prior to running
self.collect_data() # Collect all of the data statements
self.check_end()
self.check_loops()
if self.error: raise RuntimeError
while 1:
line = self.stat[self.pc]
instr = self.prog[line]
op = instr[0]
# END and STOP statements
if op == 'END' or op == 'STOP':
break # We're done
# GOTO statement
elif op == 'GOTO':
newline = instr[1]
self.goto(newline)
continue
# PRINT statement
elif op == 'PRINT':
plist = instr[1]
out = ""
for label,val in plist:
if out:
out += ' '*(15 - (len(out) % 15))
out += label
if val:
if label: out += " "
eval = self.eval(val)
out += str(eval)
sys.stdout.write(out)
end = instr[2]
if not (end == ',' or end == ';'):
sys.stdout.write("\n")
if end == ',': sys.stdout.write(" "*(15-(len(out) % 15)))
if end == ';': sys.stdout.write(" "*(3-(len(out) % 3)))
# LET statement
elif op == 'LET':
target = instr[1]
value = instr[2]
self.assign(target,value)
# READ statement
elif op == 'READ':
for target in instr[1]:
if self.dc < len(self.data):
value = ('NUM',self.data[self.dc])
self.assign(target,value)
self.dc += 1
else:
# No more data. Program ends
return
elif op == 'IF':
relop = instr[1]
newline = instr[2]
if (self.releval(relop)):
self.goto(newline)
continue
elif op == 'FOR':
loopvar = instr[1]
initval = instr[2]
finval = instr[3]
stepval = instr[4]
# Check to see if this is a new loop
if not self.loops or self.loops[-1][0] != self.pc:
# Looks like a new loop. Make the initial assignment
newvalue = initval
self.assign((loopvar,None,None),initval)
if not stepval: stepval = ('NUM',1)
stepval = self.eval(stepval) # Evaluate step here
self.loops.append((self.pc,stepval))
else:
# It's a repeat of the previous loop
# Update the value of the loop variable according to the step
stepval = ('NUM',self.loops[-1][1])
newvalue = ('BINOP','+',('VAR',(loopvar,None,None)),stepval)
if self.loops[-1][1] < 0: relop = '>='
else: relop = '<='
if not self.releval(('RELOP',relop,newvalue,finval)):
# Loop is done. Jump to the NEXT
self.pc = self.loopend[self.pc]
self.loops.pop()
else:
self.assign((loopvar,None,None),newvalue)
elif op == 'NEXT':
if not self.loops:
print("NEXT WITHOUT FOR AT LINE %s" % line)
return
nextvar = instr[1]
self.pc = self.loops[-1][0]
loopinst = self.prog[self.stat[self.pc]]
forvar = loopinst[1]
if nextvar != forvar:
print("NEXT DOESN'T MATCH FOR AT LINE %s" % line)
return
continue
elif op == 'GOSUB':
newline = instr[1]
if self.gosub:
print("ALREADY IN A SUBROUTINE AT LINE %s" % line)
return
self.gosub = self.stat[self.pc]
self.goto(newline)
continue
elif op == 'RETURN':
if not self.gosub:
print("RETURN WITHOUT A GOSUB AT LINE %s" % line)
return
self.goto(self.gosub)
self.gosub = None
elif op == 'FUNC':
fname = instr[1]
pname = instr[2]
expr = instr[3]
def eval_func(pvalue,name=pname,self=self,expr=expr):
self.assign((pname,None,None),pvalue)
return self.eval(expr)
self.functions[fname] = eval_func
elif op == 'DIM':
for vname,x,y in instr[1]:
if y == 0:
# Single dimension variable
self.lists[vname] = [0]*x
else:
# Double dimension variable
temp = [0]*y
v = []
for i in range(x):
v.append(temp[:])
self.tables[vname] = v
self.pc += 1
# Utility functions for program listing
def expr_str(self,expr):
etype = expr[0]
if etype == 'NUM': return str(expr[1])
elif etype == 'GROUP': return "(%s)" % self.expr_str(expr[1])
elif etype == 'UNARY':
if expr[1] == '-': return "-"+str(expr[2])
elif etype == 'BINOP':
return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3]))
elif etype == 'VAR':
return self.var_str(expr[1])
def relexpr_str(self,expr):
return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3]))
def var_str(self,var):
varname,dim1,dim2 = var
if not dim1 and not dim2: return varname
if dim1 and not dim2: return "%s(%s)" % (varname, self.expr_str(dim1))
return "%s(%s,%s)" % (varname, self.expr_str(dim1),self.expr_str(dim2))
# Create a program listing
def list(self):
stat = list(self.prog) # Ordered list of all line numbers
stat.sort()
for line in stat:
instr = self.prog[line]
op = instr[0]
if op in ['END','STOP','RETURN']:
print("%s %s" % (line, op))
continue
elif op == 'REM':
print("%s %s" % (line, instr[1]))
elif op == 'PRINT':
_out = "%s %s " % (line, op)
first = 1
for p in instr[1]:
if not first: _out += ", "
if p[0] and p[1]: _out += '"%s"%s' % (p[0],self.expr_str(p[1]))
elif p[1]: _out += self.expr_str(p[1])
else: _out += '"%s"' % (p[0],)
first = 0
if instr[2]: _out += instr[2]
print(_out)
elif op == 'LET':
print("%s LET %s = %s" % (line,self.var_str(instr[1]),self.expr_str(instr[2])))
elif op == 'READ':
_out = "%s READ " % line
first = 1
for r in instr[1]:
if not first: _out += ","
_out += self.var_str(r)
first = 0
print(_out)
elif op == 'IF':
print("%s IF %s THEN %d" % (line,self.relexpr_str(instr[1]),instr[2]))
elif op == 'GOTO' or op == 'GOSUB':
print("%s %s %s" % (line, op, instr[1]))
elif op == 'FOR':
_out = "%s FOR %s = %s TO %s" % (line,instr[1],self.expr_str(instr[2]),self.expr_str(instr[3]))
if instr[4]: _out += " STEP %s" % (self.expr_str(instr[4]))
print(_out)
elif op == 'NEXT':
print("%s NEXT %s" % (line, instr[1]))
elif op == 'FUNC':
print("%s DEF %s(%s) = %s" % (line,instr[1],instr[2],self.expr_str(instr[3])))
elif op == 'DIM':
_out = "%s DIM " % line
first = 1
for vname,x,y in instr[1]:
if not first: _out += ","
first = 0
if y == 0:
_out += "%s(%d)" % (vname,x)
else:
_out += "%s(%d,%d)" % (vname,x,y)
print(_out)
elif op == 'DATA':
_out = "%s DATA " % line
first = 1
for v in instr[1]:
if not first: _out += ","
first = 0
_out += v
print(_out)
# Erase the current program
def new(self):
self.prog = {}
# Insert statements
def add_statements(self,prog):
for line,stat in prog.items():
self.prog[line] = stat
# Delete a statement
def del_line(self,lineno):
try:
del self.prog[lineno]
except KeyError:
pass

View File

@ -0,0 +1,424 @@
# An implementation of Dartmouth BASIC (1964)
#
from ply import *
import basiclex
tokens = basiclex.tokens
precedence = (
('left', 'PLUS','MINUS'),
('left', 'TIMES','DIVIDE'),
('left', 'POWER'),
('right','UMINUS')
)
#### A BASIC program is a series of statements. We represent the program as a
#### dictionary of tuples indexed by line number.
def p_program(p):
'''program : program statement
| statement'''
if len(p) == 2 and p[1]:
p[0] = { }
line,stat = p[1]
p[0][line] = stat
elif len(p) ==3:
p[0] = p[1]
if not p[0]: p[0] = { }
if p[2]:
line,stat = p[2]
p[0][line] = stat
#### This catch-all rule is used for any catastrophic errors. In this case,
#### we simply return nothing
def p_program_error(p):
'''program : error'''
p[0] = None
p.parser.error = 1
#### Format of all BASIC statements.
def p_statement(p):
'''statement : INTEGER command NEWLINE'''
if isinstance(p[2],str):
print("%s %s %s" % (p[2],"AT LINE", p[1]))
p[0] = None
p.parser.error = 1
else:
lineno = int(p[1])
p[0] = (lineno,p[2])
#### Interactive statements.
def p_statement_interactive(p):
'''statement : RUN NEWLINE
| LIST NEWLINE
| NEW NEWLINE'''
p[0] = (0, (p[1],0))
#### Blank line number
def p_statement_blank(p):
'''statement : INTEGER NEWLINE'''
p[0] = (0,('BLANK',int(p[1])))
#### Error handling for malformed statements
def p_statement_bad(p):
'''statement : INTEGER error NEWLINE'''
print("MALFORMED STATEMENT AT LINE %s" % p[1])
p[0] = None
p.parser.error = 1
#### Blank line
def p_statement_newline(p):
'''statement : NEWLINE'''
p[0] = None
#### LET statement
def p_command_let(p):
'''command : LET variable EQUALS expr'''
p[0] = ('LET',p[2],p[4])
def p_command_let_bad(p):
'''command : LET variable EQUALS error'''
p[0] = "BAD EXPRESSION IN LET"
#### READ statement
def p_command_read(p):
'''command : READ varlist'''
p[0] = ('READ',p[2])
def p_command_read_bad(p):
'''command : READ error'''
p[0] = "MALFORMED VARIABLE LIST IN READ"
#### DATA statement
def p_command_data(p):
'''command : DATA numlist'''
p[0] = ('DATA',p[2])
def p_command_data_bad(p):
'''command : DATA error'''
p[0] = "MALFORMED NUMBER LIST IN DATA"
#### PRINT statement
def p_command_print(p):
'''command : PRINT plist optend'''
p[0] = ('PRINT',p[2],p[3])
def p_command_print_bad(p):
'''command : PRINT error'''
p[0] = "MALFORMED PRINT STATEMENT"
#### Optional ending on PRINT. Either a comma (,) or semicolon (;)
def p_optend(p):
'''optend : COMMA
| SEMI
|'''
if len(p) == 2:
p[0] = p[1]
else:
p[0] = None
#### PRINT statement with no arguments
def p_command_print_empty(p):
'''command : PRINT'''
p[0] = ('PRINT',[],None)
#### GOTO statement
def p_command_goto(p):
'''command : GOTO INTEGER'''
p[0] = ('GOTO',int(p[2]))
def p_command_goto_bad(p):
'''command : GOTO error'''
p[0] = "INVALID LINE NUMBER IN GOTO"
#### IF-THEN statement
def p_command_if(p):
'''command : IF relexpr THEN INTEGER'''
p[0] = ('IF',p[2],int(p[4]))
def p_command_if_bad(p):
'''command : IF error THEN INTEGER'''
p[0] = "BAD RELATIONAL EXPRESSION"
def p_command_if_bad2(p):
'''command : IF relexpr THEN error'''
p[0] = "INVALID LINE NUMBER IN THEN"
#### FOR statement
def p_command_for(p):
'''command : FOR ID EQUALS expr TO expr optstep'''
p[0] = ('FOR',p[2],p[4],p[6],p[7])
def p_command_for_bad_initial(p):
'''command : FOR ID EQUALS error TO expr optstep'''
p[0] = "BAD INITIAL VALUE IN FOR STATEMENT"
def p_command_for_bad_final(p):
'''command : FOR ID EQUALS expr TO error optstep'''
p[0] = "BAD FINAL VALUE IN FOR STATEMENT"
def p_command_for_bad_step(p):
'''command : FOR ID EQUALS expr TO expr STEP error'''
p[0] = "MALFORMED STEP IN FOR STATEMENT"
#### Optional STEP qualifier on FOR statement
def p_optstep(p):
'''optstep : STEP expr
| empty'''
if len(p) == 3:
p[0] = p[2]
else:
p[0] = None
#### NEXT statement
def p_command_next(p):
'''command : NEXT ID'''
p[0] = ('NEXT',p[2])
def p_command_next_bad(p):
'''command : NEXT error'''
p[0] = "MALFORMED NEXT"
#### END statement
def p_command_end(p):
'''command : END'''
p[0] = ('END',)
#### REM statement
def p_command_rem(p):
'''command : REM'''
p[0] = ('REM',p[1])
#### STOP statement
def p_command_stop(p):
'''command : STOP'''
p[0] = ('STOP',)
#### DEF statement
def p_command_def(p):
'''command : DEF ID LPAREN ID RPAREN EQUALS expr'''
p[0] = ('FUNC',p[2],p[4],p[7])
def p_command_def_bad_rhs(p):
'''command : DEF ID LPAREN ID RPAREN EQUALS error'''
p[0] = "BAD EXPRESSION IN DEF STATEMENT"
def p_command_def_bad_arg(p):
'''command : DEF ID LPAREN error RPAREN EQUALS expr'''
p[0] = "BAD ARGUMENT IN DEF STATEMENT"
#### GOSUB statement
def p_command_gosub(p):
'''command : GOSUB INTEGER'''
p[0] = ('GOSUB',int(p[2]))
def p_command_gosub_bad(p):
'''command : GOSUB error'''
p[0] = "INVALID LINE NUMBER IN GOSUB"
#### RETURN statement
def p_command_return(p):
'''command : RETURN'''
p[0] = ('RETURN',)
#### DIM statement
def p_command_dim(p):
'''command : DIM dimlist'''
p[0] = ('DIM',p[2])
def p_command_dim_bad(p):
'''command : DIM error'''
p[0] = "MALFORMED VARIABLE LIST IN DIM"
#### List of variables supplied to DIM statement
def p_dimlist(p):
'''dimlist : dimlist COMMA dimitem
| dimitem'''
if len(p) == 4:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
#### DIM items
def p_dimitem_single(p):
'''dimitem : ID LPAREN INTEGER RPAREN'''
p[0] = (p[1],eval(p[3]),0)
def p_dimitem_double(p):
'''dimitem : ID LPAREN INTEGER COMMA INTEGER RPAREN'''
p[0] = (p[1],eval(p[3]),eval(p[5]))
#### Arithmetic expressions
def p_expr_binary(p):
'''expr : expr PLUS expr
| expr MINUS expr
| expr TIMES expr
| expr DIVIDE expr
| expr POWER expr'''
p[0] = ('BINOP',p[2],p[1],p[3])
def p_expr_number(p):
'''expr : INTEGER
| FLOAT'''
p[0] = ('NUM',eval(p[1]))
def p_expr_variable(p):
'''expr : variable'''
p[0] = ('VAR',p[1])
def p_expr_group(p):
'''expr : LPAREN expr RPAREN'''
p[0] = ('GROUP',p[2])
def p_expr_unary(p):
'''expr : MINUS expr %prec UMINUS'''
p[0] = ('UNARY','-',p[2])
#### Relational expressions
def p_relexpr(p):
'''relexpr : expr LT expr
| expr LE expr
| expr GT expr
| expr GE expr
| expr EQUALS expr
| expr NE expr'''
p[0] = ('RELOP',p[2],p[1],p[3])
#### Variables
def p_variable(p):
'''variable : ID
| ID LPAREN expr RPAREN
| ID LPAREN expr COMMA expr RPAREN'''
if len(p) == 2:
p[0] = (p[1],None,None)
elif len(p) == 5:
p[0] = (p[1],p[3],None)
else:
p[0] = (p[1],p[3],p[5])
#### Builds a list of variable targets as a Python list
def p_varlist(p):
'''varlist : varlist COMMA variable
| variable'''
if len(p) > 2:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
#### Builds a list of numbers as a Python list
def p_numlist(p):
'''numlist : numlist COMMA number
| number'''
if len(p) > 2:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
#### A number. May be an integer or a float
def p_number(p):
'''number : INTEGER
| FLOAT'''
p[0] = eval(p[1])
#### A signed number.
def p_number_signed(p):
'''number : MINUS INTEGER
| MINUS FLOAT'''
p[0] = eval("-"+p[2])
#### List of targets for a print statement
#### Returns a list of tuples (label,expr)
def p_plist(p):
'''plist : plist COMMA pitem
| pitem'''
if len(p) > 3:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
def p_item_string(p):
'''pitem : STRING'''
p[0] = (p[1][1:-1],None)
def p_item_string_expr(p):
'''pitem : STRING expr'''
p[0] = (p[1][1:-1],p[2])
def p_item_expr(p):
'''pitem : expr'''
p[0] = ("",p[1])
#### Empty
def p_empty(p):
'''empty : '''
#### Catastrophic error handler
def p_error(p):
if not p:
print("SYNTAX ERROR AT EOF")
bparser = yacc.yacc()
def parse(data,debug=0):
bparser.error = 0
p = bparser.parse(data,debug=debug)
if bparser.error: return None
return p

14
ply/example/BASIC/dim.bas Normal file
View File

@ -0,0 +1,14 @@
5 DIM A(50,15)
10 FOR I = 1 TO 50
20 FOR J = 1 TO 15
30 LET A(I,J) = I + J
35 REM PRINT I,J, A(I,J)
40 NEXT J
50 NEXT I
100 FOR I = 1 TO 50
110 FOR J = 1 TO 15
120 PRINT A(I,J),
130 NEXT J
140 PRINT
150 NEXT I
999 END

View File

@ -0,0 +1,5 @@
10 DEF FDX(X) = 2*X
20 FOR I = 0 TO 100
30 PRINT FDX(I)
40 NEXT I
50 END

22
ply/example/BASIC/gcd.bas Normal file
View File

@ -0,0 +1,22 @@
10 PRINT "A","B","C","GCD"
20 READ A,B,C
30 LET X = A
40 LET Y = B
50 GOSUB 200
60 LET X = G
70 LET Y = C
80 GOSUB 200
90 PRINT A, B, C, G
100 GOTO 20
110 DATA 60, 90, 120
120 DATA 38456, 64872, 98765
130 DATA 32, 384, 72
200 LET Q = INT(X/Y)
210 LET R = X - Q*Y
220 IF R = 0 THEN 300
230 LET X = Y
240 LET Y = R
250 GOTO 200
300 LET G = Y
310 RETURN
999 END

View File

@ -0,0 +1,13 @@
100 LET X = 3
110 GOSUB 400
120 PRINT U, V, W
200 LET X = 5
210 GOSUB 400
220 LET Z = U + 2*V + 3*W
230 PRINT Z
240 GOTO 999
400 LET U = X*X
410 LET V = X*X*X
420 LET W = X*X*X*X + X*X*X + X*X + X
430 RETURN
999 END

View File

@ -0,0 +1,4 @@
5 REM HELLO WORLD PROGAM
10 PRINT "HELLO WORLD"
99 END

View File

@ -0,0 +1,17 @@
1 REM ::: SOLVE A SYSTEM OF LINEAR EQUATIONS
2 REM ::: A1*X1 + A2*X2 = B1
3 REM ::: A3*X1 + A4*X2 = B2
4 REM --------------------------------------
10 READ A1, A2, A3, A4
15 LET D = A1 * A4 - A3 * A2
20 IF D = 0 THEN 65
30 READ B1, B2
37 LET X1 = (B1*A4 - B2*A2) / D
42 LET X2 = (A1*B2 - A3*B1) / D
55 PRINT X1, X2
60 GOTO 30
65 PRINT "NO UNIQUE SOLUTION"
70 DATA 1, 2, 4
80 DATA 2, -7, 5
85 DATA 1, 3, 4, -7
90 END

View File

@ -0,0 +1,12 @@
5 PRINT "X VALUE", "SINE", "RESOLUTION"
10 READ D
20 LET M = -1
30 FOR X = 0 TO 3 STEP D
40 IF SIN(X) <= M THEN 80
50 LET X0 = X
60 LET M = SIN(X)
80 NEXT X
85 PRINT X0, M, D
90 GOTO 10
100 DATA .1, .01, .001
110 END

View File

@ -0,0 +1,13 @@
5 PRINT "THIS PROGRAM COMPUTES AND PRINTS THE NTH POWERS"
6 PRINT "OF THE NUMBERS LESS THAN OR EQUAL TO N FOR VARIOUS"
7 PRINT "N FROM 1 THROUGH 7"
8 PRINT
10 FOR N = 1 TO 7
15 PRINT "N = "N
20 FOR I = 1 TO N
30 PRINT I^N,
40 NEXT I
50 PRINT
60 PRINT
70 NEXT N
80 END

View File

@ -0,0 +1,4 @@
10 FOR I = 1 TO 20
20 PRINT INT(10*RND(0))
30 NEXT I
40 END

View File

@ -0,0 +1,20 @@
10 FOR I = 1 TO 3
20 READ P(I)
30 NEXT I
40 FOR I = 1 TO 3
50 FOR J = 1 TO 5
60 READ S(I,J)
70 NEXT J
80 NEXT I
90 FOR J = 1 TO 5
100 LET S = 0
110 FOR I = 1 TO 3
120 LET S = S + P(I) * S(I,J)
130 NEXT I
140 PRINT "TOTAL SALES FOR SALESMAN"J, "$"S
150 NEXT J
200 DATA 1.25, 4.30, 2.50
210 DATA 40, 20, 37, 29, 42
220 DATA 10, 16, 3, 21, 8
230 DATA 35, 47, 29, 16, 33
300 END

View File

@ -0,0 +1,18 @@
1 REM :: THIS PROGRAM COMPUTES HOW MANY TIMES YOU HAVE TO FOLD
2 REM :: A PIECE OF PAPER SO THAT IT IS TALLER THAN THE
3 REM :: SEARS TOWER.
4 REM :: S = HEIGHT OF TOWER (METERS)
5 REM :: T = THICKNESS OF PAPER (MILLIMETERS)
10 LET S = 442
20 LET T = 0.1
30 REM CONVERT T TO METERS
40 LET T = T * .001
50 LET F = 1
60 LET H = T
100 IF H > S THEN 200
120 LET H = 2 * H
125 LET F = F + 1
130 GOTO 100
200 PRINT "NUMBER OF FOLDS ="F
220 PRINT "FINAL HEIGHT ="H
999 END

View File

@ -0,0 +1,5 @@
10 LET X = 0
20 LET X = X + 1
30 PRINT X, SQR(X)
40 IF X < 100 THEN 20
50 END

View File

@ -0,0 +1,4 @@
10 FOR X = 1 TO 100
20 PRINT X, SQR(X)
30 NEXT X
40 END

View File

@ -0,0 +1,709 @@
# GardenSnake - a parser generator demonstration program
#
# This implements a modified version of a subset of Python:
# - only 'def', 'return' and 'if' statements
# - 'if' only has 'then' clause (no elif nor else)
# - single-quoted strings only, content in raw format
# - numbers are decimal.Decimal instances (not integers or floats)
# - no print statment; use the built-in 'print' function
# - only < > == + - / * implemented (and unary + -)
# - assignment and tuple assignment work
# - no generators of any sort
# - no ... well, no quite a lot
# Why? I'm thinking about a new indentation-based configuration
# language for a project and wanted to figure out how to do it. Once
# I got that working I needed a way to test it out. My original AST
# was dumb so I decided to target Python's AST and compile it into
# Python code. Plus, it's pretty cool that it only took a day or so
# from sitting down with Ply to having working code.
# This uses David Beazley's Ply from http://www.dabeaz.com/ply/
# This work is hereby released into the Public Domain. To view a copy of
# the public domain dedication, visit
# http://creativecommons.org/licenses/publicdomain/ or send a letter to
# Creative Commons, 543 Howard Street, 5th Floor, San Francisco,
# California, 94105, USA.
#
# Portions of this work are derived from Python's Grammar definition
# and may be covered under the Python copyright and license
#
# Andrew Dalke / Dalke Scientific Software, LLC
# 30 August 2006 / Cape Town, South Africa
# Changelog:
# 30 August - added link to CC license; removed the "swapcase" encoding
# Modifications for inclusion in PLY distribution
import sys
sys.path.insert(0,"../..")
from ply import *
##### Lexer ######
#import lex
import decimal
tokens = (
'DEF',
'IF',
'NAME',
'NUMBER', # Python decimals
'STRING', # single quoted strings only; syntax of raw strings
'LPAR',
'RPAR',
'COLON',
'EQ',
'ASSIGN',
'LT',
'GT',
'PLUS',
'MINUS',
'MULT',
'DIV',
'RETURN',
'WS',
'NEWLINE',
'COMMA',
'SEMICOLON',
'INDENT',
'DEDENT',
'ENDMARKER',
)
#t_NUMBER = r'\d+'
# taken from decmial.py but without the leading sign
def t_NUMBER(t):
r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
t.value = decimal.Decimal(t.value)
return t
def t_STRING(t):
r"'([^\\']+|\\'|\\\\)*'" # I think this is right ...
t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun
return t
t_COLON = r':'
t_EQ = r'=='
t_ASSIGN = r'='
t_LT = r'<'
t_GT = r'>'
t_PLUS = r'\+'
t_MINUS = r'-'
t_MULT = r'\*'
t_DIV = r'/'
t_COMMA = r','
t_SEMICOLON = r';'
# Ply nicely documented how to do this.
RESERVED = {
"def": "DEF",
"if": "IF",
"return": "RETURN",
}
def t_NAME(t):
r'[a-zA-Z_][a-zA-Z0-9_]*'
t.type = RESERVED.get(t.value, "NAME")
return t
# Putting this before t_WS let it consume lines with only comments in
# them so the latter code never sees the WS part. Not consuming the
# newline. Needed for "if 1: #comment"
def t_comment(t):
r"[ ]*\043[^\n]*" # \043 is '#'
pass
# Whitespace
def t_WS(t):
r' [ ]+ '
if t.lexer.at_line_start and t.lexer.paren_count == 0:
return t
# Don't generate newline tokens when inside of parenthesis, eg
# a = (1,
# 2, 3)
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
t.type = "NEWLINE"
if t.lexer.paren_count == 0:
return t
def t_LPAR(t):
r'\('
t.lexer.paren_count += 1
return t
def t_RPAR(t):
r'\)'
# check for underflow? should be the job of the parser
t.lexer.paren_count -= 1
return t
def t_error(t):
raise SyntaxError("Unknown symbol %r" % (t.value[0],))
print "Skipping", repr(t.value[0])
t.lexer.skip(1)
## I implemented INDENT / DEDENT generation as a post-processing filter
# The original lex token stream contains WS and NEWLINE characters.
# WS will only occur before any other tokens on a line.
# I have three filters. One tags tokens by adding two attributes.
# "must_indent" is True if the token must be indented from the
# previous code. The other is "at_line_start" which is True for WS
# and the first non-WS/non-NEWLINE on a line. It flags the check so
# see if the new line has changed indication level.
# Python's syntax has three INDENT states
# 0) no colon hence no need to indent
# 1) "if 1: go()" - simple statements have a COLON but no need for an indent
# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
NO_INDENT = 0
MAY_INDENT = 1
MUST_INDENT = 2
# only care about whitespace at the start of a line
def track_tokens_filter(lexer, tokens):
lexer.at_line_start = at_line_start = True
indent = NO_INDENT
saw_colon = False
for token in tokens:
token.at_line_start = at_line_start
if token.type == "COLON":
at_line_start = False
indent = MAY_INDENT
token.must_indent = False
elif token.type == "NEWLINE":
at_line_start = True
if indent == MAY_INDENT:
indent = MUST_INDENT
token.must_indent = False
elif token.type == "WS":
assert token.at_line_start == True
at_line_start = True
token.must_indent = False
else:
# A real token; only indent after COLON NEWLINE
if indent == MUST_INDENT:
token.must_indent = True
else:
token.must_indent = False
at_line_start = False
indent = NO_INDENT
yield token
lexer.at_line_start = at_line_start
def _new_token(type, lineno):
tok = lex.LexToken()
tok.type = type
tok.value = None
tok.lineno = lineno
return tok
# Synthesize a DEDENT tag
def DEDENT(lineno):
return _new_token("DEDENT", lineno)
# Synthesize an INDENT tag
def INDENT(lineno):
return _new_token("INDENT", lineno)
# Track the indentation level and emit the right INDENT / DEDENT events.
def indentation_filter(tokens):
# A stack of indentation levels; will never pop item 0
levels = [0]
token = None
depth = 0
prev_was_ws = False
for token in tokens:
## if 1:
## print "Process", token,
## if token.at_line_start:
## print "at_line_start",
## if token.must_indent:
## print "must_indent",
## print
# WS only occurs at the start of the line
# There may be WS followed by NEWLINE so
# only track the depth here. Don't indent/dedent
# until there's something real.
if token.type == "WS":
assert depth == 0
depth = len(token.value)
prev_was_ws = True
# WS tokens are never passed to the parser
continue
if token.type == "NEWLINE":
depth = 0
if prev_was_ws or token.at_line_start:
# ignore blank lines
continue
# pass the other cases on through
yield token
continue
# then it must be a real token (not WS, not NEWLINE)
# which can affect the indentation level
prev_was_ws = False
if token.must_indent:
# The current depth must be larger than the previous level
if not (depth > levels[-1]):
raise IndentationError("expected an indented block")
levels.append(depth)
yield INDENT(token.lineno)
elif token.at_line_start:
# Must be on the same level or one of the previous levels
if depth == levels[-1]:
# At the same level
pass
elif depth > levels[-1]:
raise IndentationError("indentation increase but not in new block")
else:
# Back up; but only if it matches a previous level
try:
i = levels.index(depth)
except ValueError:
raise IndentationError("inconsistent indentation")
for _ in range(i+1, len(levels)):
yield DEDENT(token.lineno)
levels.pop()
yield token
### Finished processing ###
# Must dedent any remaining levels
if len(levels) > 1:
assert token is not None
for _ in range(1, len(levels)):
yield DEDENT(token.lineno)
# The top-level filter adds an ENDMARKER, if requested.
# Python's grammar uses it.
def filter(lexer, add_endmarker = True):
token = None
tokens = iter(lexer.token, None)
tokens = track_tokens_filter(lexer, tokens)
for token in indentation_filter(tokens):
yield token
if add_endmarker:
lineno = 1
if token is not None:
lineno = token.lineno
yield _new_token("ENDMARKER", lineno)
# Combine Ply and my filters into a new lexer
class IndentLexer(object):
def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags)
self.token_stream = None
def input(self, s, add_endmarker=True):
self.lexer.paren_count = 0
self.lexer.input(s)
self.token_stream = filter(self.lexer, add_endmarker)
def token(self):
try:
return self.token_stream.next()
except StopIteration:
return None
########## Parser (tokens -> AST) ######
# also part of Ply
#import yacc
# I use the Python AST
from compiler import ast
# Helper function
def Assign(left, right):
names = []
if isinstance(left, ast.Name):
# Single assignment on left
return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
elif isinstance(left, ast.Tuple):
# List of things - make sure they are Name nodes
names = []
for child in left.getChildren():
if not isinstance(child, ast.Name):
raise SyntaxError("that assignment not supported")
names.append(child.name)
ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
return ast.Assign([ast.AssTuple(ass_list)], right)
else:
raise SyntaxError("Can't do that yet")
# The grammar comments come from Python's Grammar/Grammar file
## NB: compound_stmt in single_input is followed by extra NEWLINE!
# file_input: (NEWLINE | stmt)* ENDMARKER
def p_file_input_end(p):
"""file_input_end : file_input ENDMARKER"""
p[0] = ast.Stmt(p[1])
def p_file_input(p):
"""file_input : file_input NEWLINE
| file_input stmt
| NEWLINE
| stmt"""
if isinstance(p[len(p)-1], basestring):
if len(p) == 3:
p[0] = p[1]
else:
p[0] = [] # p == 2 --> only a blank line
else:
if len(p) == 3:
p[0] = p[1] + p[2]
else:
p[0] = p[1]
# funcdef: [decorators] 'def' NAME parameters ':' suite
# ignoring decorators
def p_funcdef(p):
"funcdef : DEF NAME parameters COLON suite"
p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5])
# parameters: '(' [varargslist] ')'
def p_parameters(p):
"""parameters : LPAR RPAR
| LPAR varargslist RPAR"""
if len(p) == 3:
p[0] = []
else:
p[0] = p[2]
# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) |
# highly simplified
def p_varargslist(p):
"""varargslist : varargslist COMMA NAME
| NAME"""
if len(p) == 4:
p[0] = p[1] + p[3]
else:
p[0] = [p[1]]
# stmt: simple_stmt | compound_stmt
def p_stmt_simple(p):
"""stmt : simple_stmt"""
# simple_stmt is a list
p[0] = p[1]
def p_stmt_compound(p):
"""stmt : compound_stmt"""
p[0] = [p[1]]
# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
def p_simple_stmt(p):
"""simple_stmt : small_stmts NEWLINE
| small_stmts SEMICOLON NEWLINE"""
p[0] = p[1]
def p_small_stmts(p):
"""small_stmts : small_stmts SEMICOLON small_stmt
| small_stmt"""
if len(p) == 4:
p[0] = p[1] + [p[3]]
else:
p[0] = [p[1]]
# small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
# import_stmt | global_stmt | exec_stmt | assert_stmt
def p_small_stmt(p):
"""small_stmt : flow_stmt
| expr_stmt"""
p[0] = p[1]
# expr_stmt: testlist (augassign (yield_expr|testlist) |
# ('=' (yield_expr|testlist))*)
# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
# '<<=' | '>>=' | '**=' | '//=')
def p_expr_stmt(p):
"""expr_stmt : testlist ASSIGN testlist
| testlist """
if len(p) == 2:
# a list of expressions
p[0] = ast.Discard(p[1])
else:
p[0] = Assign(p[1], p[3])
def p_flow_stmt(p):
"flow_stmt : return_stmt"
p[0] = p[1]
# return_stmt: 'return' [testlist]
def p_return_stmt(p):
"return_stmt : RETURN testlist"
p[0] = ast.Return(p[2])
def p_compound_stmt(p):
"""compound_stmt : if_stmt
| funcdef"""
p[0] = p[1]
def p_if_stmt(p):
'if_stmt : IF test COLON suite'
p[0] = ast.If([(p[2], p[4])], None)
def p_suite(p):
"""suite : simple_stmt
| NEWLINE INDENT stmts DEDENT"""
if len(p) == 2:
p[0] = ast.Stmt(p[1])
else:
p[0] = ast.Stmt(p[3])
def p_stmts(p):
"""stmts : stmts stmt
| stmt"""
if len(p) == 3:
p[0] = p[1] + p[2]
else:
p[0] = p[1]
## No using Python's approach because Ply supports precedence
# comparison: expr (comp_op expr)*
# arith_expr: term (('+'|'-') term)*
# term: factor (('*'|'/'|'%'|'//') factor)*
# factor: ('+'|'-'|'~') factor | power
# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
def make_lt_compare((left, right)):
return ast.Compare(left, [('<', right),])
def make_gt_compare((left, right)):
return ast.Compare(left, [('>', right),])
def make_eq_compare((left, right)):
return ast.Compare(left, [('==', right),])
binary_ops = {
"+": ast.Add,
"-": ast.Sub,
"*": ast.Mul,
"/": ast.Div,
"<": make_lt_compare,
">": make_gt_compare,
"==": make_eq_compare,
}
unary_ops = {
"+": ast.UnaryAdd,
"-": ast.UnarySub,
}
precedence = (
("left", "EQ", "GT", "LT"),
("left", "PLUS", "MINUS"),
("left", "MULT", "DIV"),
)
def p_comparison(p):
"""comparison : comparison PLUS comparison
| comparison MINUS comparison
| comparison MULT comparison
| comparison DIV comparison
| comparison LT comparison
| comparison EQ comparison
| comparison GT comparison
| PLUS comparison
| MINUS comparison
| power"""
if len(p) == 4:
p[0] = binary_ops[p[2]]((p[1], p[3]))
elif len(p) == 3:
p[0] = unary_ops[p[1]](p[2])
else:
p[0] = p[1]
# power: atom trailer* ['**' factor]
# trailers enables function calls. I only allow one level of calls
# so this is 'trailer'
def p_power(p):
"""power : atom
| atom trailer"""
if len(p) == 2:
p[0] = p[1]
else:
if p[2][0] == "CALL":
p[0] = ast.CallFunc(p[1], p[2][1], None, None)
else:
raise AssertionError("not implemented")
def p_atom_name(p):
"""atom : NAME"""
p[0] = ast.Name(p[1])
def p_atom_number(p):
"""atom : NUMBER
| STRING"""
p[0] = ast.Const(p[1])
def p_atom_tuple(p):
"""atom : LPAR testlist RPAR"""
p[0] = p[2]
# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
def p_trailer(p):
"trailer : LPAR arglist RPAR"
p[0] = ("CALL", p[2])
# testlist: test (',' test)* [',']
# Contains shift/reduce error
def p_testlist(p):
"""testlist : testlist_multi COMMA
| testlist_multi """
if len(p) == 2:
p[0] = p[1]
else:
# May need to promote singleton to tuple
if isinstance(p[1], list):
p[0] = p[1]
else:
p[0] = [p[1]]
# Convert into a tuple?
if isinstance(p[0], list):
p[0] = ast.Tuple(p[0])
def p_testlist_multi(p):
"""testlist_multi : testlist_multi COMMA test
| test"""
if len(p) == 2:
# singleton
p[0] = p[1]
else:
if isinstance(p[1], list):
p[0] = p[1] + [p[3]]
else:
# singleton -> tuple
p[0] = [p[1], p[3]]
# test: or_test ['if' or_test 'else' test] | lambdef
# as I don't support 'and', 'or', and 'not' this works down to 'comparison'
def p_test(p):
"test : comparison"
p[0] = p[1]
# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
# XXX INCOMPLETE: this doesn't allow the trailing comma
def p_arglist(p):
"""arglist : arglist COMMA argument
| argument"""
if len(p) == 4:
p[0] = p[1] + [p[3]]
else:
p[0] = [p[1]]
# argument: test [gen_for] | test '=' test # Really [keyword '='] test
def p_argument(p):
"argument : test"
p[0] = p[1]
def p_error(p):
#print "Error!", repr(p)
raise SyntaxError(p)
class GardenSnakeParser(object):
def __init__(self, lexer = None):
if lexer is None:
lexer = IndentLexer()
self.lexer = lexer
self.parser = yacc.yacc(start="file_input_end")
def parse(self, code):
self.lexer.input(code)
result = self.parser.parse(lexer = self.lexer)
return ast.Module(None, result)
###### Code generation ######
from compiler import misc, syntax, pycodegen
class GardenSnakeCompiler(object):
def __init__(self):
self.parser = GardenSnakeParser()
def compile(self, code, filename="<string>"):
tree = self.parser.parse(code)
#print tree
misc.set_filename(filename, tree)
syntax.check(tree)
gen = pycodegen.ModuleCodeGenerator(tree)
code = gen.getCode()
return code
####### Test code #######
compile = GardenSnakeCompiler().compile
code = r"""
print('LET\'S TRY THIS \\OUT')
#Comment here
def x(a):
print('called with',a)
if a == 1:
return 2
if a*2 > 10: return 999 / 4
# Another comment here
return a+2*3
ints = (1, 2,
3, 4,
5)
print('mutiline-expression', ints)
t = 4+1/3*2+6*(9-5+1)
print('predence test; should be 34+2/3:', t, t==(34+2/3))
print('numbers', 1,2,3,4,5)
if 1:
8
a=9
print(x(a))
print(x(1))
print(x(2))
print(x(8),'3')
print('this is decimal', 1/5)
print('BIG DECIMAL', 1.234567891234567e12345)
"""
# Set up the GardenSnake run-time environment
def print_(*args):
print "-->", " ".join(map(str,args))
globals()["print"] = print_
compiled_code = compile(code)
exec compiled_code in globals()
print "Done"

View File

@ -0,0 +1,5 @@
This example is Andrew Dalke's GardenSnake language. It shows how to process an
indentation-like language like Python. Further details can be found here:
http://dalkescientific.com/writings/diary/archive/2006/08/30/gardensnake_language.html

10
ply/example/README Normal file
View File

@ -0,0 +1,10 @@
Simple examples:
calc - Simple calculator
classcalc - Simple calculate defined as a class
Complex examples
ansic - ANSI C grammar from K&R
BASIC - A small BASIC interpreter
GardenSnake - A simple python-like language
yply - Converts Unix yacc files to PLY programs.

2
ply/example/ansic/README Normal file
View File

@ -0,0 +1,2 @@
This example is incomplete. Was going to specify an ANSI C parser.
This is part of it.

164
ply/example/ansic/clex.py Normal file
View File

@ -0,0 +1,164 @@
# ----------------------------------------------------------------------
# clex.py
#
# A lexer for ANSI C.
# ----------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
import ply.lex as lex
# Reserved words
reserved = (
'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
)
tokens = reserved + (
# Literals (identifier, integer constant, float constant, string constant, char const)
'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
# Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
# Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
# Increment/decrement (++,--)
'PLUSPLUS', 'MINUSMINUS',
# Structure dereference (->)
'ARROW',
# Conditional operator (?)
'CONDOP',
# Delimeters ( ) [ ] { } , . ; :
'LPAREN', 'RPAREN',
'LBRACKET', 'RBRACKET',
'LBRACE', 'RBRACE',
'COMMA', 'PERIOD', 'SEMI', 'COLON',
# Ellipsis (...)
'ELLIPSIS',
)
# Completely ignored characters
t_ignore = ' \t\x0c'
# Newlines
def t_NEWLINE(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
# Operators
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MOD = r'%'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
t_LSHIFT = r'<<'
t_RSHIFT = r'>>'
t_LOR = r'\|\|'
t_LAND = r'&&'
t_LNOT = r'!'
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_EQ = r'=='
t_NE = r'!='
# Assignment operators
t_EQUALS = r'='
t_TIMESEQUAL = r'\*='
t_DIVEQUAL = r'/='
t_MODEQUAL = r'%='
t_PLUSEQUAL = r'\+='
t_MINUSEQUAL = r'-='
t_LSHIFTEQUAL = r'<<='
t_RSHIFTEQUAL = r'>>='
t_ANDEQUAL = r'&='
t_OREQUAL = r'\|='
t_XOREQUAL = r'^='
# Increment/decrement
t_PLUSPLUS = r'\+\+'
t_MINUSMINUS = r'--'
# ->
t_ARROW = r'->'
# ?
t_CONDOP = r'\?'
# Delimeters
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
t_ELLIPSIS = r'\.\.\.'
# Identifiers and reserved words
reserved_map = { }
for r in reserved:
reserved_map[r.lower()] = r
def t_ID(t):
r'[A-Za-z_][\w_]*'
t.type = reserved_map.get(t.value,"ID")
return t
# Integer literal
t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
# Floating literal
t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
# String literal
t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
# Character constant 'c' or L'c'
t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
# Comments
def t_comment(t):
r'/\*(.|\n)*?\*/'
t.lexer.lineno += t.value.count('\n')
# Preprocessor directive (ignored)
def t_preprocessor(t):
r'\#(.)*?\n'
t.lexer.lineno += 1
def t_error(t):
print("Illegal character %s" % repr(t.value[0]))
t.lexer.skip(1)
lexer = lex.lex(optimize=1)
if __name__ == "__main__":
lex.runmain(lexer)

863
ply/example/ansic/cparse.py Normal file
View File

@ -0,0 +1,863 @@
# -----------------------------------------------------------------------------
# cparse.py
#
# Simple parser for ANSI C. Based on the grammar in K&R, 2nd Ed.
# -----------------------------------------------------------------------------
import sys
import clex
import ply.yacc as yacc
# Get the token map
tokens = clex.tokens
# translation-unit:
def p_translation_unit_1(t):
'translation_unit : external_declaration'
pass
def p_translation_unit_2(t):
'translation_unit : translation_unit external_declaration'
pass
# external-declaration:
def p_external_declaration_1(t):
'external_declaration : function_definition'
pass
def p_external_declaration_2(t):
'external_declaration : declaration'
pass
# function-definition:
def p_function_definition_1(t):
'function_definition : declaration_specifiers declarator declaration_list compound_statement'
pass
def p_function_definition_2(t):
'function_definition : declarator declaration_list compound_statement'
pass
def p_function_definition_3(t):
'function_definition : declarator compound_statement'
pass
def p_function_definition_4(t):
'function_definition : declaration_specifiers declarator compound_statement'
pass
# declaration:
def p_declaration_1(t):
'declaration : declaration_specifiers init_declarator_list SEMI'
pass
def p_declaration_2(t):
'declaration : declaration_specifiers SEMI'
pass
# declaration-list:
def p_declaration_list_1(t):
'declaration_list : declaration'
pass
def p_declaration_list_2(t):
'declaration_list : declaration_list declaration '
pass
# declaration-specifiers
def p_declaration_specifiers_1(t):
'declaration_specifiers : storage_class_specifier declaration_specifiers'
pass
def p_declaration_specifiers_2(t):
'declaration_specifiers : type_specifier declaration_specifiers'
pass
def p_declaration_specifiers_3(t):
'declaration_specifiers : type_qualifier declaration_specifiers'
pass
def p_declaration_specifiers_4(t):
'declaration_specifiers : storage_class_specifier'
pass
def p_declaration_specifiers_5(t):
'declaration_specifiers : type_specifier'
pass
def p_declaration_specifiers_6(t):
'declaration_specifiers : type_qualifier'
pass
# storage-class-specifier
def p_storage_class_specifier(t):
'''storage_class_specifier : AUTO
| REGISTER
| STATIC
| EXTERN
| TYPEDEF
'''
pass
# type-specifier:
def p_type_specifier(t):
'''type_specifier : VOID
| CHAR
| SHORT
| INT
| LONG
| FLOAT
| DOUBLE
| SIGNED
| UNSIGNED
| struct_or_union_specifier
| enum_specifier
| TYPEID
'''
pass
# type-qualifier:
def p_type_qualifier(t):
'''type_qualifier : CONST
| VOLATILE'''
pass
# struct-or-union-specifier
def p_struct_or_union_specifier_1(t):
'struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE'
pass
def p_struct_or_union_specifier_2(t):
'struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE'
pass
def p_struct_or_union_specifier_3(t):
'struct_or_union_specifier : struct_or_union ID'
pass
# struct-or-union:
def p_struct_or_union(t):
'''struct_or_union : STRUCT
| UNION
'''
pass
# struct-declaration-list:
def p_struct_declaration_list_1(t):
'struct_declaration_list : struct_declaration'
pass
def p_struct_declaration_list_2(t):
'struct_declaration_list : struct_declaration_list struct_declaration'
pass
# init-declarator-list:
def p_init_declarator_list_1(t):
'init_declarator_list : init_declarator'
pass
def p_init_declarator_list_2(t):
'init_declarator_list : init_declarator_list COMMA init_declarator'
pass
# init-declarator
def p_init_declarator_1(t):
'init_declarator : declarator'
pass
def p_init_declarator_2(t):
'init_declarator : declarator EQUALS initializer'
pass
# struct-declaration:
def p_struct_declaration(t):
'struct_declaration : specifier_qualifier_list struct_declarator_list SEMI'
pass
# specifier-qualifier-list:
def p_specifier_qualifier_list_1(t):
'specifier_qualifier_list : type_specifier specifier_qualifier_list'
pass
def p_specifier_qualifier_list_2(t):
'specifier_qualifier_list : type_specifier'
pass
def p_specifier_qualifier_list_3(t):
'specifier_qualifier_list : type_qualifier specifier_qualifier_list'
pass
def p_specifier_qualifier_list_4(t):
'specifier_qualifier_list : type_qualifier'
pass
# struct-declarator-list:
def p_struct_declarator_list_1(t):
'struct_declarator_list : struct_declarator'
pass
def p_struct_declarator_list_2(t):
'struct_declarator_list : struct_declarator_list COMMA struct_declarator'
pass
# struct-declarator:
def p_struct_declarator_1(t):
'struct_declarator : declarator'
pass
def p_struct_declarator_2(t):
'struct_declarator : declarator COLON constant_expression'
pass
def p_struct_declarator_3(t):
'struct_declarator : COLON constant_expression'
pass
# enum-specifier:
def p_enum_specifier_1(t):
'enum_specifier : ENUM ID LBRACE enumerator_list RBRACE'
pass
def p_enum_specifier_2(t):
'enum_specifier : ENUM LBRACE enumerator_list RBRACE'
pass
def p_enum_specifier_3(t):
'enum_specifier : ENUM ID'
pass
# enumerator_list:
def p_enumerator_list_1(t):
'enumerator_list : enumerator'
pass
def p_enumerator_list_2(t):
'enumerator_list : enumerator_list COMMA enumerator'
pass
# enumerator:
def p_enumerator_1(t):
'enumerator : ID'
pass
def p_enumerator_2(t):
'enumerator : ID EQUALS constant_expression'
pass
# declarator:
def p_declarator_1(t):
'declarator : pointer direct_declarator'
pass
def p_declarator_2(t):
'declarator : direct_declarator'
pass
# direct-declarator:
def p_direct_declarator_1(t):
'direct_declarator : ID'
pass
def p_direct_declarator_2(t):
'direct_declarator : LPAREN declarator RPAREN'
pass
def p_direct_declarator_3(t):
'direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET'
pass
def p_direct_declarator_4(t):
'direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN '
pass
def p_direct_declarator_5(t):
'direct_declarator : direct_declarator LPAREN identifier_list RPAREN '
pass
def p_direct_declarator_6(t):
'direct_declarator : direct_declarator LPAREN RPAREN '
pass
# pointer:
def p_pointer_1(t):
'pointer : TIMES type_qualifier_list'
pass
def p_pointer_2(t):
'pointer : TIMES'
pass
def p_pointer_3(t):
'pointer : TIMES type_qualifier_list pointer'
pass
def p_pointer_4(t):
'pointer : TIMES pointer'
pass
# type-qualifier-list:
def p_type_qualifier_list_1(t):
'type_qualifier_list : type_qualifier'
pass
def p_type_qualifier_list_2(t):
'type_qualifier_list : type_qualifier_list type_qualifier'
pass
# parameter-type-list:
def p_parameter_type_list_1(t):
'parameter_type_list : parameter_list'
pass
def p_parameter_type_list_2(t):
'parameter_type_list : parameter_list COMMA ELLIPSIS'
pass
# parameter-list:
def p_parameter_list_1(t):
'parameter_list : parameter_declaration'
pass
def p_parameter_list_2(t):
'parameter_list : parameter_list COMMA parameter_declaration'
pass
# parameter-declaration:
def p_parameter_declaration_1(t):
'parameter_declaration : declaration_specifiers declarator'
pass
def p_parameter_declaration_2(t):
'parameter_declaration : declaration_specifiers abstract_declarator_opt'
pass
# identifier-list:
def p_identifier_list_1(t):
'identifier_list : ID'
pass
def p_identifier_list_2(t):
'identifier_list : identifier_list COMMA ID'
pass
# initializer:
def p_initializer_1(t):
'initializer : assignment_expression'
pass
def p_initializer_2(t):
'''initializer : LBRACE initializer_list RBRACE
| LBRACE initializer_list COMMA RBRACE'''
pass
# initializer-list:
def p_initializer_list_1(t):
'initializer_list : initializer'
pass
def p_initializer_list_2(t):
'initializer_list : initializer_list COMMA initializer'
pass
# type-name:
def p_type_name(t):
'type_name : specifier_qualifier_list abstract_declarator_opt'
pass
def p_abstract_declarator_opt_1(t):
'abstract_declarator_opt : empty'
pass
def p_abstract_declarator_opt_2(t):
'abstract_declarator_opt : abstract_declarator'
pass
# abstract-declarator:
def p_abstract_declarator_1(t):
'abstract_declarator : pointer '
pass
def p_abstract_declarator_2(t):
'abstract_declarator : pointer direct_abstract_declarator'
pass
def p_abstract_declarator_3(t):
'abstract_declarator : direct_abstract_declarator'
pass
# direct-abstract-declarator:
def p_direct_abstract_declarator_1(t):
'direct_abstract_declarator : LPAREN abstract_declarator RPAREN'
pass
def p_direct_abstract_declarator_2(t):
'direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET'
pass
def p_direct_abstract_declarator_3(t):
'direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET'
pass
def p_direct_abstract_declarator_4(t):
'direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN'
pass
def p_direct_abstract_declarator_5(t):
'direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN'
pass
# Optional fields in abstract declarators
def p_constant_expression_opt_1(t):
'constant_expression_opt : empty'
pass
def p_constant_expression_opt_2(t):
'constant_expression_opt : constant_expression'
pass
def p_parameter_type_list_opt_1(t):
'parameter_type_list_opt : empty'
pass
def p_parameter_type_list_opt_2(t):
'parameter_type_list_opt : parameter_type_list'
pass
# statement:
def p_statement(t):
'''
statement : labeled_statement
| expression_statement
| compound_statement
| selection_statement
| iteration_statement
| jump_statement
'''
pass
# labeled-statement:
def p_labeled_statement_1(t):
'labeled_statement : ID COLON statement'
pass
def p_labeled_statement_2(t):
'labeled_statement : CASE constant_expression COLON statement'
pass
def p_labeled_statement_3(t):
'labeled_statement : DEFAULT COLON statement'
pass
# expression-statement:
def p_expression_statement(t):
'expression_statement : expression_opt SEMI'
pass
# compound-statement:
def p_compound_statement_1(t):
'compound_statement : LBRACE declaration_list statement_list RBRACE'
pass
def p_compound_statement_2(t):
'compound_statement : LBRACE statement_list RBRACE'
pass
def p_compound_statement_3(t):
'compound_statement : LBRACE declaration_list RBRACE'
pass
def p_compound_statement_4(t):
'compound_statement : LBRACE RBRACE'
pass
# statement-list:
def p_statement_list_1(t):
'statement_list : statement'
pass
def p_statement_list_2(t):
'statement_list : statement_list statement'
pass
# selection-statement
def p_selection_statement_1(t):
'selection_statement : IF LPAREN expression RPAREN statement'
pass
def p_selection_statement_2(t):
'selection_statement : IF LPAREN expression RPAREN statement ELSE statement '
pass
def p_selection_statement_3(t):
'selection_statement : SWITCH LPAREN expression RPAREN statement '
pass
# iteration_statement:
def p_iteration_statement_1(t):
'iteration_statement : WHILE LPAREN expression RPAREN statement'
pass
def p_iteration_statement_2(t):
'iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement '
pass
def p_iteration_statement_3(t):
'iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI'
pass
# jump_statement:
def p_jump_statement_1(t):
'jump_statement : GOTO ID SEMI'
pass
def p_jump_statement_2(t):
'jump_statement : CONTINUE SEMI'
pass
def p_jump_statement_3(t):
'jump_statement : BREAK SEMI'
pass
def p_jump_statement_4(t):
'jump_statement : RETURN expression_opt SEMI'
pass
def p_expression_opt_1(t):
'expression_opt : empty'
pass
def p_expression_opt_2(t):
'expression_opt : expression'
pass
# expression:
def p_expression_1(t):
'expression : assignment_expression'
pass
def p_expression_2(t):
'expression : expression COMMA assignment_expression'
pass
# assigment_expression:
def p_assignment_expression_1(t):
'assignment_expression : conditional_expression'
pass
def p_assignment_expression_2(t):
'assignment_expression : unary_expression assignment_operator assignment_expression'
pass
# assignment_operator:
def p_assignment_operator(t):
'''
assignment_operator : EQUALS
| TIMESEQUAL
| DIVEQUAL
| MODEQUAL
| PLUSEQUAL
| MINUSEQUAL
| LSHIFTEQUAL
| RSHIFTEQUAL
| ANDEQUAL
| OREQUAL
| XOREQUAL
'''
pass
# conditional-expression
def p_conditional_expression_1(t):
'conditional_expression : logical_or_expression'
pass
def p_conditional_expression_2(t):
'conditional_expression : logical_or_expression CONDOP expression COLON conditional_expression '
pass
# constant-expression
def p_constant_expression(t):
'constant_expression : conditional_expression'
pass
# logical-or-expression
def p_logical_or_expression_1(t):
'logical_or_expression : logical_and_expression'
pass
def p_logical_or_expression_2(t):
'logical_or_expression : logical_or_expression LOR logical_and_expression'
pass
# logical-and-expression
def p_logical_and_expression_1(t):
'logical_and_expression : inclusive_or_expression'
pass
def p_logical_and_expression_2(t):
'logical_and_expression : logical_and_expression LAND inclusive_or_expression'
pass
# inclusive-or-expression:
def p_inclusive_or_expression_1(t):
'inclusive_or_expression : exclusive_or_expression'
pass
def p_inclusive_or_expression_2(t):
'inclusive_or_expression : inclusive_or_expression OR exclusive_or_expression'
pass
# exclusive-or-expression:
def p_exclusive_or_expression_1(t):
'exclusive_or_expression : and_expression'
pass
def p_exclusive_or_expression_2(t):
'exclusive_or_expression : exclusive_or_expression XOR and_expression'
pass
# AND-expression
def p_and_expression_1(t):
'and_expression : equality_expression'
pass
def p_and_expression_2(t):
'and_expression : and_expression AND equality_expression'
pass
# equality-expression:
def p_equality_expression_1(t):
'equality_expression : relational_expression'
pass
def p_equality_expression_2(t):
'equality_expression : equality_expression EQ relational_expression'
pass
def p_equality_expression_3(t):
'equality_expression : equality_expression NE relational_expression'
pass
# relational-expression:
def p_relational_expression_1(t):
'relational_expression : shift_expression'
pass
def p_relational_expression_2(t):
'relational_expression : relational_expression LT shift_expression'
pass
def p_relational_expression_3(t):
'relational_expression : relational_expression GT shift_expression'
pass
def p_relational_expression_4(t):
'relational_expression : relational_expression LE shift_expression'
pass
def p_relational_expression_5(t):
'relational_expression : relational_expression GE shift_expression'
pass
# shift-expression
def p_shift_expression_1(t):
'shift_expression : additive_expression'
pass
def p_shift_expression_2(t):
'shift_expression : shift_expression LSHIFT additive_expression'
pass
def p_shift_expression_3(t):
'shift_expression : shift_expression RSHIFT additive_expression'
pass
# additive-expression
def p_additive_expression_1(t):
'additive_expression : multiplicative_expression'
pass
def p_additive_expression_2(t):
'additive_expression : additive_expression PLUS multiplicative_expression'
pass
def p_additive_expression_3(t):
'additive_expression : additive_expression MINUS multiplicative_expression'
pass
# multiplicative-expression
def p_multiplicative_expression_1(t):
'multiplicative_expression : cast_expression'
pass
def p_multiplicative_expression_2(t):
'multiplicative_expression : multiplicative_expression TIMES cast_expression'
pass
def p_multiplicative_expression_3(t):
'multiplicative_expression : multiplicative_expression DIVIDE cast_expression'
pass
def p_multiplicative_expression_4(t):
'multiplicative_expression : multiplicative_expression MOD cast_expression'
pass
# cast-expression:
def p_cast_expression_1(t):
'cast_expression : unary_expression'
pass
def p_cast_expression_2(t):
'cast_expression : LPAREN type_name RPAREN cast_expression'
pass
# unary-expression:
def p_unary_expression_1(t):
'unary_expression : postfix_expression'
pass
def p_unary_expression_2(t):
'unary_expression : PLUSPLUS unary_expression'
pass
def p_unary_expression_3(t):
'unary_expression : MINUSMINUS unary_expression'
pass
def p_unary_expression_4(t):
'unary_expression : unary_operator cast_expression'
pass
def p_unary_expression_5(t):
'unary_expression : SIZEOF unary_expression'
pass
def p_unary_expression_6(t):
'unary_expression : SIZEOF LPAREN type_name RPAREN'
pass
#unary-operator
def p_unary_operator(t):
'''unary_operator : AND
| TIMES
| PLUS
| MINUS
| NOT
| LNOT '''
pass
# postfix-expression:
def p_postfix_expression_1(t):
'postfix_expression : primary_expression'
pass
def p_postfix_expression_2(t):
'postfix_expression : postfix_expression LBRACKET expression RBRACKET'
pass
def p_postfix_expression_3(t):
'postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN'
pass
def p_postfix_expression_4(t):
'postfix_expression : postfix_expression LPAREN RPAREN'
pass
def p_postfix_expression_5(t):
'postfix_expression : postfix_expression PERIOD ID'
pass
def p_postfix_expression_6(t):
'postfix_expression : postfix_expression ARROW ID'
pass
def p_postfix_expression_7(t):
'postfix_expression : postfix_expression PLUSPLUS'
pass
def p_postfix_expression_8(t):
'postfix_expression : postfix_expression MINUSMINUS'
pass
# primary-expression:
def p_primary_expression(t):
'''primary_expression : ID
| constant
| SCONST
| LPAREN expression RPAREN'''
pass
# argument-expression-list:
def p_argument_expression_list(t):
'''argument_expression_list : assignment_expression
| argument_expression_list COMMA assignment_expression'''
pass
# constant:
def p_constant(t):
'''constant : ICONST
| FCONST
| CCONST'''
pass
def p_empty(t):
'empty : '
pass
def p_error(t):
print("Whoa. We're hosed")
import profile
# Build the grammar
yacc.yacc(method='LALR')
#profile.run("yacc.yacc(method='LALR')")

107
ply/example/calc/calc.py Normal file
View File

@ -0,0 +1,107 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
tokens = (
'NAME','NUMBER',
)
literals = ['=','+','-','*','/', '(',')']
# Tokens
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Parsing rules
precedence = (
('left','+','-'),
('left','*','/'),
('right','UMINUS'),
)
# dictionary of names
names = { }
def p_statement_assign(p):
'statement : NAME "=" expression'
names[p[1]] = p[3]
def p_statement_expr(p):
'statement : expression'
print(p[1])
def p_expression_binop(p):
'''expression : expression '+' expression
| expression '-' expression
| expression '*' expression
| expression '/' expression'''
if p[2] == '+' : p[0] = p[1] + p[3]
elif p[2] == '-': p[0] = p[1] - p[3]
elif p[2] == '*': p[0] = p[1] * p[3]
elif p[2] == '/': p[0] = p[1] / p[3]
def p_expression_uminus(p):
"expression : '-' expression %prec UMINUS"
p[0] = -p[2]
def p_expression_group(p):
"expression : '(' expression ')'"
p[0] = p[2]
def p_expression_number(p):
"expression : NUMBER"
p[0] = p[1]
def p_expression_name(p):
"expression : NAME"
try:
p[0] = names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s: continue
yacc.parse(s)

View File

@ -0,0 +1,113 @@
# -----------------------------------------------------------------------------
# calc.py
#
# This example shows how to run the parser in a debugging mode
# with output routed to a logging object.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
tokens = (
'NAME','NUMBER',
)
literals = ['=','+','-','*','/', '(',')']
# Tokens
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Parsing rules
precedence = (
('left','+','-'),
('left','*','/'),
('right','UMINUS'),
)
# dictionary of names
names = { }
def p_statement_assign(p):
'statement : NAME "=" expression'
names[p[1]] = p[3]
def p_statement_expr(p):
'statement : expression'
print(p[1])
def p_expression_binop(p):
'''expression : expression '+' expression
| expression '-' expression
| expression '*' expression
| expression '/' expression'''
if p[2] == '+' : p[0] = p[1] + p[3]
elif p[2] == '-': p[0] = p[1] - p[3]
elif p[2] == '*': p[0] = p[1] * p[3]
elif p[2] == '/': p[0] = p[1] / p[3]
def p_expression_uminus(p):
"expression : '-' expression %prec UMINUS"
p[0] = -p[2]
def p_expression_group(p):
"expression : '(' expression ')'"
p[0] = p[2]
def p_expression_number(p):
"expression : NUMBER"
p[0] = p[1]
def p_expression_name(p):
"expression : NAME"
try:
p[0] = names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
import ply.yacc as yacc
yacc.yacc()
import logging
logging.basicConfig(
level=logging.INFO,
filename="parselog.txt"
)
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s: continue
yacc.parse(s,debug=logging.getLogger())

157
ply/example/classcalc/calc.py Executable file
View File

@ -0,0 +1,157 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
#
# Class-based example contributed to PLY by David McNab
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
import ply.lex as lex
import ply.yacc as yacc
import os
class Parser:
"""
Base class for a lexer/parser that has the rules defined as methods
"""
tokens = ()
precedence = ()
def __init__(self, **kw):
self.debug = kw.get('debug', 0)
self.names = { }
try:
modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__
except:
modname = "parser"+"_"+self.__class__.__name__
self.debugfile = modname + ".dbg"
self.tabmodule = modname + "_" + "parsetab"
#print self.debugfile, self.tabmodule
# Build the lexer and parser
lex.lex(module=self, debug=self.debug)
yacc.yacc(module=self,
debug=self.debug,
debugfile=self.debugfile,
tabmodule=self.tabmodule)
def run(self):
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s: continue
yacc.parse(s)
class Calc(Parser):
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_EXP = r'\*\*'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(self, t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
#print "parsed number %s" % repr(t.value)
return t
t_ignore = " \t"
def t_newline(self, t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(self, t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Parsing rules
precedence = (
('left','PLUS','MINUS'),
('left','TIMES','DIVIDE'),
('left', 'EXP'),
('right','UMINUS'),
)
def p_statement_assign(self, p):
'statement : NAME EQUALS expression'
self.names[p[1]] = p[3]
def p_statement_expr(self, p):
'statement : expression'
print(p[1])
def p_expression_binop(self, p):
"""
expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression
| expression EXP expression
"""
#print [repr(p[i]) for i in range(0,4)]
if p[2] == '+' : p[0] = p[1] + p[3]
elif p[2] == '-': p[0] = p[1] - p[3]
elif p[2] == '*': p[0] = p[1] * p[3]
elif p[2] == '/': p[0] = p[1] / p[3]
elif p[2] == '**': p[0] = p[1] ** p[3]
def p_expression_uminus(self, p):
'expression : MINUS expression %prec UMINUS'
p[0] = -p[2]
def p_expression_group(self, p):
'expression : LPAREN expression RPAREN'
p[0] = p[2]
def p_expression_number(self, p):
'expression : NUMBER'
p[0] = p[1]
def p_expression_name(self, p):
'expression : NAME'
try:
p[0] = self.names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(self, p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
if __name__ == '__main__':
calc = Calc()
calc.run()

View File

@ -0,0 +1,40 @@
# calc_Calc_parsetab.py
# This file is automatically generated. Do not edit.
_tabversion = '3.2'
_lr_method = 'LALR'
_lr_signature = '|\x0f"\xe2\x0e\xf7\x0fT\x15K\x1c\xc0\x1e\xa3c\x10'
_lr_action_items = {'$end':([1,2,3,5,9,15,16,17,18,19,20,21,22,],[-11,-10,0,-2,-11,-8,-1,-9,-6,-5,-3,-7,-4,]),'RPAREN':([2,8,9,15,17,18,19,20,21,22,],[-10,17,-11,-8,-9,-6,-5,-3,-7,-4,]),'DIVIDE':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,10,10,-11,-8,10,-9,-6,-5,10,-7,10,]),'EQUALS':([1,],[7,]),'NUMBER':([0,4,6,7,10,11,12,13,14,],[2,2,2,2,2,2,2,2,2,]),'PLUS':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,12,12,-11,-8,12,-9,-6,-5,-3,-7,-4,]),'LPAREN':([0,4,6,7,10,11,12,13,14,],[4,4,4,4,4,4,4,4,4,]),'EXP':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,13,13,-11,-8,13,-9,13,13,13,-7,13,]),'TIMES':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,11,11,-11,-8,11,-9,-6,-5,11,-7,11,]),'MINUS':([0,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,],[6,-11,-10,6,14,6,6,14,-11,6,6,6,6,6,-8,14,-9,-6,-5,-3,-7,-4,]),'NAME':([0,4,6,7,10,11,12,13,14,],[1,9,9,9,9,9,9,9,9,]),}
_lr_action = { }
for _k, _v in _lr_action_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_action: _lr_action[_x] = { }
_lr_action[_x][_k] = _y
del _lr_action_items
_lr_goto_items = {'expression':([0,4,6,7,10,11,12,13,14,],[5,8,15,16,18,19,20,21,22,]),'statement':([0,],[3,]),}
_lr_goto = { }
for _k, _v in _lr_goto_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_goto: _lr_goto[_x] = { }
_lr_goto[_x][_k] = _y
del _lr_goto_items
_lr_productions = [
("S' -> statement","S'",1,None,None,None),
('statement -> NAME EQUALS expression','statement',3,'p_statement_assign','./calc.py',107),
('statement -> expression','statement',1,'p_statement_expr','./calc.py',111),
('expression -> expression PLUS expression','expression',3,'p_expression_binop','./calc.py',116),
('expression -> expression MINUS expression','expression',3,'p_expression_binop','./calc.py',117),
('expression -> expression TIMES expression','expression',3,'p_expression_binop','./calc.py',118),
('expression -> expression DIVIDE expression','expression',3,'p_expression_binop','./calc.py',119),
('expression -> expression EXP expression','expression',3,'p_expression_binop','./calc.py',120),
('expression -> MINUS expression','expression',2,'p_expression_uminus','./calc.py',130),
('expression -> LPAREN expression RPAREN','expression',3,'p_expression_group','./calc.py',134),
('expression -> NUMBER','expression',1,'p_expression_number','./calc.py',138),
('expression -> NAME','expression',1,'p_expression_name','./calc.py',142),
]

2
ply/example/cleanup.sh Executable file
View File

@ -0,0 +1,2 @@
#!/bin/sh
rm -f */*.pyc */parsetab.py */parser.out */*~ */*.class

View File

@ -0,0 +1,130 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A calculator parser that makes use of closures. The function make_calculator()
# returns a function that accepts an input string and returns a result. All
# lexing rules, parsing rules, and internal state are held inside the function.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
# Make a calculator function
def make_calculator():
import ply.lex as lex
import ply.yacc as yacc
# ------- Internal calculator state
variables = { } # Dictionary of stored variables
# ------- Calculator tokenizing rules
tokens = (
'NAME','NUMBER',
)
literals = ['=','+','-','*','/', '(',')']
t_ignore = " \t"
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
lexer = lex.lex()
# ------- Calculator parsing rules
precedence = (
('left','+','-'),
('left','*','/'),
('right','UMINUS'),
)
def p_statement_assign(p):
'statement : NAME "=" expression'
variables[p[1]] = p[3]
p[0] = None
def p_statement_expr(p):
'statement : expression'
p[0] = p[1]
def p_expression_binop(p):
'''expression : expression '+' expression
| expression '-' expression
| expression '*' expression
| expression '/' expression'''
if p[2] == '+' : p[0] = p[1] + p[3]
elif p[2] == '-': p[0] = p[1] - p[3]
elif p[2] == '*': p[0] = p[1] * p[3]
elif p[2] == '/': p[0] = p[1] / p[3]
def p_expression_uminus(p):
"expression : '-' expression %prec UMINUS"
p[0] = -p[2]
def p_expression_group(p):
"expression : '(' expression ')'"
p[0] = p[2]
def p_expression_number(p):
"expression : NUMBER"
p[0] = p[1]
def p_expression_name(p):
"expression : NAME"
try:
p[0] = variables[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
# Build the parser
parser = yacc.yacc()
# ------- Input function
def input(text):
result = parser.parse(text,lexer=lexer)
return result
return input
# Make a calculator object and use it
calc = make_calculator()
while True:
try:
s = raw_input("calc > ")
except EOFError:
break
r = calc(s)
if r:
print(r)

View File

@ -0,0 +1,48 @@
# -----------------------------------------------------------------------------
# hedit.py
#
# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson)
#
# These tokens can't be easily tokenized because they are of the following
# form:
#
# nHc1...cn
#
# where n is a positive integer and c1 ... cn are characters.
#
# This example shows how to modify the state of the lexer to parse
# such tokens
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
tokens = (
'H_EDIT_DESCRIPTOR',
)
# Tokens
t_ignore = " \t\n"
def t_H_EDIT_DESCRIPTOR(t):
r"\d+H.*" # This grabs all of the remaining text
i = t.value.index('H')
n = eval(t.value[:i])
# Adjust the tokenizing position
t.lexer.lexpos -= len(t.value) - (i+1+n)
t.value = t.value[i+1:i+1+n]
return t
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
lex.runmain()

160
ply/example/newclasscalc/calc.py Executable file
View File

@ -0,0 +1,160 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
#
# Class-based example contributed to PLY by David McNab.
#
# Modified to use new-style classes. Test case.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
import ply.lex as lex
import ply.yacc as yacc
import os
class Parser(object):
"""
Base class for a lexer/parser that has the rules defined as methods
"""
tokens = ()
precedence = ()
def __init__(self, **kw):
self.debug = kw.get('debug', 0)
self.names = { }
try:
modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__
except:
modname = "parser"+"_"+self.__class__.__name__
self.debugfile = modname + ".dbg"
self.tabmodule = modname + "_" + "parsetab"
#print self.debugfile, self.tabmodule
# Build the lexer and parser
lex.lex(module=self, debug=self.debug)
yacc.yacc(module=self,
debug=self.debug,
debugfile=self.debugfile,
tabmodule=self.tabmodule)
def run(self):
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s: continue
yacc.parse(s)
class Calc(Parser):
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_EXP = r'\*\*'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(self, t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
#print "parsed number %s" % repr(t.value)
return t
t_ignore = " \t"
def t_newline(self, t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(self, t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Parsing rules
precedence = (
('left','PLUS','MINUS'),
('left','TIMES','DIVIDE'),
('left', 'EXP'),
('right','UMINUS'),
)
def p_statement_assign(self, p):
'statement : NAME EQUALS expression'
self.names[p[1]] = p[3]
def p_statement_expr(self, p):
'statement : expression'
print(p[1])
def p_expression_binop(self, p):
"""
expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression
| expression EXP expression
"""
#print [repr(p[i]) for i in range(0,4)]
if p[2] == '+' : p[0] = p[1] + p[3]
elif p[2] == '-': p[0] = p[1] - p[3]
elif p[2] == '*': p[0] = p[1] * p[3]
elif p[2] == '/': p[0] = p[1] / p[3]
elif p[2] == '**': p[0] = p[1] ** p[3]
def p_expression_uminus(self, p):
'expression : MINUS expression %prec UMINUS'
p[0] = -p[2]
def p_expression_group(self, p):
'expression : LPAREN expression RPAREN'
p[0] = p[2]
def p_expression_number(self, p):
'expression : NUMBER'
p[0] = p[1]
def p_expression_name(self, p):
'expression : NAME'
try:
p[0] = self.names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(self, p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
if __name__ == '__main__':
calc = Calc()
calc.run()

View File

@ -0,0 +1,9 @@
An example showing how to use Python optimized mode.
To run:
- First run 'python calc.py'
- Then run 'python -OO calc.py'
If working correctly, the second version should run the
same way.

119
ply/example/optcalc/calc.py Normal file
View File

@ -0,0 +1,119 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex(optimize=1)
# Parsing rules
precedence = (
('left','PLUS','MINUS'),
('left','TIMES','DIVIDE'),
('right','UMINUS'),
)
# dictionary of names
names = { }
def p_statement_assign(t):
'statement : NAME EQUALS expression'
names[t[1]] = t[3]
def p_statement_expr(t):
'statement : expression'
print(t[1])
def p_expression_binop(t):
'''expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression'''
if t[2] == '+' : t[0] = t[1] + t[3]
elif t[2] == '-': t[0] = t[1] - t[3]
elif t[2] == '*': t[0] = t[1] * t[3]
elif t[2] == '/': t[0] = t[1] / t[3]
elif t[2] == '<': t[0] = t[1] < t[3]
def p_expression_uminus(t):
'expression : MINUS expression %prec UMINUS'
t[0] = -t[2]
def p_expression_group(t):
'expression : LPAREN expression RPAREN'
t[0] = t[2]
def p_expression_number(t):
'expression : NUMBER'
t[0] = t[1]
def p_expression_name(t):
'expression : NAME'
try:
t[0] = names[t[1]]
except LookupError:
print("Undefined name '%s'" % t[1])
t[0] = 0
def p_error(t):
if t:
print("Syntax error at '%s'" % t.value)
else:
print("Syntax error at EOF")
import ply.yacc as yacc
yacc.yacc(optimize=1)
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
yacc.parse(s)

117
ply/example/unicalc/calc.py Normal file
View File

@ -0,0 +1,117 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
#
# This example uses unicode strings for tokens, docstrings, and input.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = ur'\+'
t_MINUS = ur'-'
t_TIMES = ur'\*'
t_DIVIDE = ur'/'
t_EQUALS = ur'='
t_LPAREN = ur'\('
t_RPAREN = ur'\)'
t_NAME = ur'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
ur'\d+'
try:
t.value = int(t.value)
except ValueError:
print "Integer value too large", t.value
t.value = 0
return t
t_ignore = u" \t"
def t_newline(t):
ur'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print "Illegal character '%s'" % t.value[0]
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Parsing rules
precedence = (
('left','PLUS','MINUS'),
('left','TIMES','DIVIDE'),
('right','UMINUS'),
)
# dictionary of names
names = { }
def p_statement_assign(p):
'statement : NAME EQUALS expression'
names[p[1]] = p[3]
def p_statement_expr(p):
'statement : expression'
print p[1]
def p_expression_binop(p):
'''expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression'''
if p[2] == u'+' : p[0] = p[1] + p[3]
elif p[2] == u'-': p[0] = p[1] - p[3]
elif p[2] == u'*': p[0] = p[1] * p[3]
elif p[2] == u'/': p[0] = p[1] / p[3]
def p_expression_uminus(p):
'expression : MINUS expression %prec UMINUS'
p[0] = -p[2]
def p_expression_group(p):
'expression : LPAREN expression RPAREN'
p[0] = p[2]
def p_expression_number(p):
'expression : NUMBER'
p[0] = p[1]
def p_expression_name(p):
'expression : NAME'
try:
p[0] = names[p[1]]
except LookupError:
print "Undefined name '%s'" % p[1]
p[0] = 0
def p_error(p):
if p:
print "Syntax error at '%s'" % p.value
else:
print "Syntax error at EOF"
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s: continue
yacc.parse(unicode(s))

41
ply/example/yply/README Normal file
View File

@ -0,0 +1,41 @@
yply.py
This example implements a program yply.py that converts a UNIX-yacc
specification file into a PLY-compatible program. To use, simply
run it like this:
% python yply.py [-nocode] inputfile.y >myparser.py
The output of this program is Python code. In the output,
any C code in the original file is included, but is commented out.
If you use the -nocode option, then all of the C code in the
original file is just discarded.
To use the resulting grammer with PLY, you'll need to edit the
myparser.py file. Within this file, some stub code is included that
can be used to test the construction of the parsing tables. However,
you'll need to do more editing to make a workable parser.
Disclaimer: This just an example I threw together in an afternoon.
It might have some bugs. However, it worked when I tried it on
a yacc-specified C++ parser containing 442 rules and 855 parsing
states.
Comments:
1. This example does not parse specification files meant for lex/flex.
You'll need to specify the tokenizer on your own.
2. This example shows a number of interesting PLY features including
- Parsing of literal text delimited by nested parentheses
- Some interaction between the parser and the lexer.
- Use of literals in the grammar specification
- One pass compilation. The program just emits the result,
there is no intermediate parse tree.
3. This program could probably be cleaned up and enhanced a lot.
It would be great if someone wanted to work on this (hint).
-Dave

112
ply/example/yply/ylex.py Normal file
View File

@ -0,0 +1,112 @@
# lexer for yacc-grammars
#
# Author: David Beazley (dave@dabeaz.com)
# Date : October 2, 2006
import sys
sys.path.append("../..")
from ply import *
tokens = (
'LITERAL','SECTION','TOKEN','LEFT','RIGHT','PREC','START','TYPE','NONASSOC','UNION','CODE',
'ID','QLITERAL','NUMBER',
)
states = (('code','exclusive'),)
literals = [ ';', ',', '<', '>', '|',':' ]
t_ignore = ' \t'
t_TOKEN = r'%token'
t_LEFT = r'%left'
t_RIGHT = r'%right'
t_NONASSOC = r'%nonassoc'
t_PREC = r'%prec'
t_START = r'%start'
t_TYPE = r'%type'
t_UNION = r'%union'
t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*'
t_QLITERAL = r'''(?P<quote>['"]).*?(?P=quote)'''
t_NUMBER = r'\d+'
def t_SECTION(t):
r'%%'
if getattr(t.lexer,"lastsection",0):
t.value = t.lexer.lexdata[t.lexpos+2:]
t.lexer.lexpos = len(t.lexer.lexdata)
else:
t.lexer.lastsection = 0
return t
# Comments
def t_ccomment(t):
r'/\*(.|\n)*?\*/'
t.lexer.lineno += t.value.count('\n')
t_ignore_cppcomment = r'//.*'
def t_LITERAL(t):
r'%\{(.|\n)*?%\}'
t.lexer.lineno += t.value.count("\n")
return t
def t_NEWLINE(t):
r'\n'
t.lexer.lineno += 1
def t_code(t):
r'\{'
t.lexer.codestart = t.lexpos
t.lexer.level = 1
t.lexer.begin('code')
def t_code_ignore_string(t):
r'\"([^\\\n]|(\\.))*?\"'
def t_code_ignore_char(t):
r'\'([^\\\n]|(\\.))*?\''
def t_code_ignore_comment(t):
r'/\*(.|\n)*?\*/'
def t_code_ignore_cppcom(t):
r'//.*'
def t_code_lbrace(t):
r'\{'
t.lexer.level += 1
def t_code_rbrace(t):
r'\}'
t.lexer.level -= 1
if t.lexer.level == 0:
t.type = 'CODE'
t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos+1]
t.lexer.begin('INITIAL')
t.lexer.lineno += t.value.count('\n')
return t
t_code_ignore_nonspace = r'[^\s\}\'\"\{]+'
t_code_ignore_whitespace = r'\s+'
t_code_ignore = ""
def t_code_error(t):
raise RuntimeError
def t_error(t):
print "%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0])
print t.value
t.lexer.skip(1)
lex.lex()
if __name__ == '__main__':
lex.runmain()

217
ply/example/yply/yparse.py Normal file
View File

@ -0,0 +1,217 @@
# parser for Unix yacc-based grammars
#
# Author: David Beazley (dave@dabeaz.com)
# Date : October 2, 2006
import ylex
tokens = ylex.tokens
from ply import *
tokenlist = []
preclist = []
emit_code = 1
def p_yacc(p):
'''yacc : defsection rulesection'''
def p_defsection(p):
'''defsection : definitions SECTION
| SECTION'''
p.lexer.lastsection = 1
print "tokens = ", repr(tokenlist)
print
print "precedence = ", repr(preclist)
print
print "# -------------- RULES ----------------"
print
def p_rulesection(p):
'''rulesection : rules SECTION'''
print "# -------------- RULES END ----------------"
print_code(p[2],0)
def p_definitions(p):
'''definitions : definitions definition
| definition'''
def p_definition_literal(p):
'''definition : LITERAL'''
print_code(p[1],0)
def p_definition_start(p):
'''definition : START ID'''
print "start = '%s'" % p[2]
def p_definition_token(p):
'''definition : toktype opttype idlist optsemi '''
for i in p[3]:
if i[0] not in "'\"":
tokenlist.append(i)
if p[1] == '%left':
preclist.append(('left',) + tuple(p[3]))
elif p[1] == '%right':
preclist.append(('right',) + tuple(p[3]))
elif p[1] == '%nonassoc':
preclist.append(('nonassoc',)+ tuple(p[3]))
def p_toktype(p):
'''toktype : TOKEN
| LEFT
| RIGHT
| NONASSOC'''
p[0] = p[1]
def p_opttype(p):
'''opttype : '<' ID '>'
| empty'''
def p_idlist(p):
'''idlist : idlist optcomma tokenid
| tokenid'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1]
p[1].append(p[3])
def p_tokenid(p):
'''tokenid : ID
| ID NUMBER
| QLITERAL
| QLITERAL NUMBER'''
p[0] = p[1]
def p_optsemi(p):
'''optsemi : ';'
| empty'''
def p_optcomma(p):
'''optcomma : ','
| empty'''
def p_definition_type(p):
'''definition : TYPE '<' ID '>' namelist optsemi'''
# type declarations are ignored
def p_namelist(p):
'''namelist : namelist optcomma ID
| ID'''
def p_definition_union(p):
'''definition : UNION CODE optsemi'''
# Union declarations are ignored
def p_rules(p):
'''rules : rules rule
| rule'''
if len(p) == 2:
rule = p[1]
else:
rule = p[2]
# Print out a Python equivalent of this rule
embedded = [ ] # Embedded actions (a mess)
embed_count = 0
rulename = rule[0]
rulecount = 1
for r in rule[1]:
# r contains one of the rule possibilities
print "def p_%s_%d(p):" % (rulename,rulecount)
prod = []
prodcode = ""
for i in range(len(r)):
item = r[i]
if item[0] == '{': # A code block
if i == len(r) - 1:
prodcode = item
break
else:
# an embedded action
embed_name = "_embed%d_%s" % (embed_count,rulename)
prod.append(embed_name)
embedded.append((embed_name,item))
embed_count += 1
else:
prod.append(item)
print " '''%s : %s'''" % (rulename, " ".join(prod))
# Emit code
print_code(prodcode,4)
print
rulecount += 1
for e,code in embedded:
print "def p_%s(p):" % e
print " '''%s : '''" % e
print_code(code,4)
print
def p_rule(p):
'''rule : ID ':' rulelist ';' '''
p[0] = (p[1],[p[3]])
def p_rule2(p):
'''rule : ID ':' rulelist morerules ';' '''
p[4].insert(0,p[3])
p[0] = (p[1],p[4])
def p_rule_empty(p):
'''rule : ID ':' ';' '''
p[0] = (p[1],[[]])
def p_rule_empty2(p):
'''rule : ID ':' morerules ';' '''
p[3].insert(0,[])
p[0] = (p[1],p[3])
def p_morerules(p):
'''morerules : morerules '|' rulelist
| '|' rulelist
| '|' '''
if len(p) == 2:
p[0] = [[]]
elif len(p) == 3:
p[0] = [p[2]]
else:
p[0] = p[1]
p[0].append(p[3])
# print "morerules", len(p), p[0]
def p_rulelist(p):
'''rulelist : rulelist ruleitem
| ruleitem'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1]
p[1].append(p[2])
def p_ruleitem(p):
'''ruleitem : ID
| QLITERAL
| CODE
| PREC'''
p[0] = p[1]
def p_empty(p):
'''empty : '''
def p_error(p):
pass
yacc.yacc(debug=0)
def print_code(code,indent):
if not emit_code: return
codelines = code.splitlines()
for c in codelines:
print "%s# %s" % (" "*indent,c)

53
ply/example/yply/yply.py Executable file
View File

@ -0,0 +1,53 @@
#!/usr/local/bin/python
# yply.py
#
# Author: David Beazley (dave@dabeaz.com)
# Date : October 2, 2006
#
# Converts a UNIX-yacc specification file into a PLY-compatible
# specification. To use, simply do this:
#
# % python yply.py [-nocode] inputfile.y >myparser.py
#
# The output of this program is Python code. In the output,
# any C code in the original file is included, but is commented.
# If you use the -nocode option, then all of the C code in the
# original file is discarded.
#
# Disclaimer: This just an example I threw together in an afternoon.
# It might have some bugs. However, it worked when I tried it on
# a yacc-specified C++ parser containing 442 rules and 855 parsing
# states.
#
import sys
sys.path.insert(0,"../..")
import ylex
import yparse
from ply import *
if len(sys.argv) == 1:
print "usage : yply.py [-nocode] inputfile"
raise SystemExit
if len(sys.argv) == 3:
if sys.argv[1] == '-nocode':
yparse.emit_code = 0
else:
print "Unknown option '%s'" % sys.argv[1]
raise SystemExit
filename = sys.argv[2]
else:
filename = sys.argv[1]
yacc.parse(open(filename).read())
print """
if __name__ == '__main__':
from ply import *
yacc.yacc()
"""

4
ply/ply/__init__.py Normal file
View File

@ -0,0 +1,4 @@
# PLY package
# Author: David Beazley (dave@dabeaz.com)
__all__ = ['lex','yacc']

898
ply/ply/cpp.py Normal file
View File

@ -0,0 +1,898 @@
# -----------------------------------------------------------------------------
# cpp.py
#
# Author: David Beazley (http://www.dabeaz.com)
# Copyright (C) 2007
# All rights reserved
#
# This module implements an ANSI-C style lexical preprocessor for PLY.
# -----------------------------------------------------------------------------
from __future__ import generators
# -----------------------------------------------------------------------------
# Default preprocessor lexer definitions. These tokens are enough to get
# a basic preprocessor working. Other modules may import these if they want
# -----------------------------------------------------------------------------
tokens = (
'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT', 'CPP_POUND','CPP_DPOUND'
)
literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
# Whitespace
def t_CPP_WS(t):
r'\s+'
t.lexer.lineno += t.value.count("\n")
return t
t_CPP_POUND = r'\#'
t_CPP_DPOUND = r'\#\#'
# Identifier
t_CPP_ID = r'[A-Za-z_][\w_]*'
# Integer literal
def CPP_INTEGER(t):
r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU]|[lL]|[uU][lL]|[lL][uU])?)'
return t
t_CPP_INTEGER = CPP_INTEGER
# Floating literal
t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
# String literal
def t_CPP_STRING(t):
r'\"([^\\\n]|(\\(.|\n)))*?\"'
t.lexer.lineno += t.value.count("\n")
return t
# Character constant 'c' or L'c'
def t_CPP_CHAR(t):
r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
t.lexer.lineno += t.value.count("\n")
return t
# Comment
def t_CPP_COMMENT(t):
r'(/\*(.|\n)*?\*/)|(//.*?\n)'
t.lexer.lineno += t.value.count("\n")
return t
def t_error(t):
t.type = t.value[0]
t.value = t.value[0]
t.lexer.skip(1)
return t
import re
import copy
import time
import os.path
# -----------------------------------------------------------------------------
# trigraph()
#
# Given an input string, this function replaces all trigraph sequences.
# The following mapping is used:
#
# ??= #
# ??/ \
# ??' ^
# ??( [
# ??) ]
# ??! |
# ??< {
# ??> }
# ??- ~
# -----------------------------------------------------------------------------
_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
_trigraph_rep = {
'=':'#',
'/':'\\',
"'":'^',
'(':'[',
')':']',
'!':'|',
'<':'{',
'>':'}',
'-':'~'
}
def trigraph(input):
return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
# ------------------------------------------------------------------
# Macro object
#
# This object holds information about preprocessor macros
#
# .name - Macro name (string)
# .value - Macro value (a list of tokens)
# .arglist - List of argument names
# .variadic - Boolean indicating whether or not variadic macro
# .vararg - Name of the variadic parameter
#
# When a macro is created, the macro replacement token sequence is
# pre-scanned and used to create patch lists that are later used
# during macro expansion
# ------------------------------------------------------------------
class Macro(object):
def __init__(self,name,value,arglist=None,variadic=False):
self.name = name
self.value = value
self.arglist = arglist
self.variadic = variadic
if variadic:
self.vararg = arglist[-1]
self.source = None
# ------------------------------------------------------------------
# Preprocessor object
#
# Object representing a preprocessor. Contains macro definitions,
# include directories, and other information
# ------------------------------------------------------------------
class Preprocessor(object):
def __init__(self,lexer=None):
if lexer is None:
lexer = lex.lexer
self.lexer = lexer
self.macros = { }
self.path = []
self.temp_path = []
# Probe the lexer for selected tokens
self.lexprobe()
tm = time.localtime()
self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
self.parser = None
# -----------------------------------------------------------------------------
# tokenize()
#
# Utility function. Given a string of text, tokenize into a list of tokens
# -----------------------------------------------------------------------------
def tokenize(self,text):
tokens = []
self.lexer.input(text)
while True:
tok = self.lexer.token()
if not tok: break
tokens.append(tok)
return tokens
# ---------------------------------------------------------------------
# error()
#
# Report a preprocessor error/warning of some kind
# ----------------------------------------------------------------------
def error(self,file,line,msg):
print("%s:%d %s" % (file,line,msg))
# ----------------------------------------------------------------------
# lexprobe()
#
# This method probes the preprocessor lexer object to discover
# the token types of symbols that are important to the preprocessor.
# If this works right, the preprocessor will simply "work"
# with any suitable lexer regardless of how tokens have been named.
# ----------------------------------------------------------------------
def lexprobe(self):
# Determine the token type for identifiers
self.lexer.input("identifier")
tok = self.lexer.token()
if not tok or tok.value != "identifier":
print("Couldn't determine identifier type")
else:
self.t_ID = tok.type
# Determine the token type for integers
self.lexer.input("12345")
tok = self.lexer.token()
if not tok or int(tok.value) != 12345:
print("Couldn't determine integer type")
else:
self.t_INTEGER = tok.type
self.t_INTEGER_TYPE = type(tok.value)
# Determine the token type for strings enclosed in double quotes
self.lexer.input("\"filename\"")
tok = self.lexer.token()
if not tok or tok.value != "\"filename\"":
print("Couldn't determine string type")
else:
self.t_STRING = tok.type
# Determine the token type for whitespace--if any
self.lexer.input(" ")
tok = self.lexer.token()
if not tok or tok.value != " ":
self.t_SPACE = None
else:
self.t_SPACE = tok.type
# Determine the token type for newlines
self.lexer.input("\n")
tok = self.lexer.token()
if not tok or tok.value != "\n":
self.t_NEWLINE = None
print("Couldn't determine token for newlines")
else:
self.t_NEWLINE = tok.type
self.t_WS = (self.t_SPACE, self.t_NEWLINE)
# Check for other characters used by the preprocessor
chars = [ '<','>','#','##','\\','(',')',',','.']
for c in chars:
self.lexer.input(c)
tok = self.lexer.token()
if not tok or tok.value != c:
print("Unable to lex '%s' required for preprocessor" % c)
# ----------------------------------------------------------------------
# add_path()
#
# Adds a search path to the preprocessor.
# ----------------------------------------------------------------------
def add_path(self,path):
self.path.append(path)
# ----------------------------------------------------------------------
# group_lines()
#
# Given an input string, this function splits it into lines. Trailing whitespace
# is removed. Any line ending with \ is grouped with the next line. This
# function forms the lowest level of the preprocessor---grouping into text into
# a line-by-line format.
# ----------------------------------------------------------------------
def group_lines(self,input):
lex = self.lexer.clone()
lines = [x.rstrip() for x in input.splitlines()]
for i in xrange(len(lines)):
j = i+1
while lines[i].endswith('\\') and (j < len(lines)):
lines[i] = lines[i][:-1]+lines[j]
lines[j] = ""
j += 1
input = "\n".join(lines)
lex.input(input)
lex.lineno = 1
current_line = []
while True:
tok = lex.token()
if not tok:
break
current_line.append(tok)
if tok.type in self.t_WS and '\n' in tok.value:
yield current_line
current_line = []
if current_line:
yield current_line
# ----------------------------------------------------------------------
# tokenstrip()
#
# Remove leading/trailing whitespace tokens from a token list
# ----------------------------------------------------------------------
def tokenstrip(self,tokens):
i = 0
while i < len(tokens) and tokens[i].type in self.t_WS:
i += 1
del tokens[:i]
i = len(tokens)-1
while i >= 0 and tokens[i].type in self.t_WS:
i -= 1
del tokens[i+1:]
return tokens
# ----------------------------------------------------------------------
# collect_args()
#
# Collects comma separated arguments from a list of tokens. The arguments
# must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
# where tokencount is the number of tokens consumed, args is a list of arguments,
# and positions is a list of integers containing the starting index of each
# argument. Each argument is represented by a list of tokens.
#
# When collecting arguments, leading and trailing whitespace is removed
# from each argument.
#
# This function properly handles nested parenthesis and commas---these do not
# define new arguments.
# ----------------------------------------------------------------------
def collect_args(self,tokenlist):
args = []
positions = []
current_arg = []
nesting = 1
tokenlen = len(tokenlist)
# Search for the opening '('.
i = 0
while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
i += 1
if (i < tokenlen) and (tokenlist[i].value == '('):
positions.append(i+1)
else:
self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
return 0, [], []
i += 1
while i < tokenlen:
t = tokenlist[i]
if t.value == '(':
current_arg.append(t)
nesting += 1
elif t.value == ')':
nesting -= 1
if nesting == 0:
if current_arg:
args.append(self.tokenstrip(current_arg))
positions.append(i)
return i+1,args,positions
current_arg.append(t)
elif t.value == ',' and nesting == 1:
args.append(self.tokenstrip(current_arg))
positions.append(i+1)
current_arg = []
else:
current_arg.append(t)
i += 1
# Missing end argument
self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
return 0, [],[]
# ----------------------------------------------------------------------
# macro_prescan()
#
# Examine the macro value (token sequence) and identify patch points
# This is used to speed up macro expansion later on---we'll know
# right away where to apply patches to the value to form the expansion
# ----------------------------------------------------------------------
def macro_prescan(self,macro):
macro.patch = [] # Standard macro arguments
macro.str_patch = [] # String conversion expansion
macro.var_comma_patch = [] # Variadic macro comma patch
i = 0
while i < len(macro.value):
if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
argnum = macro.arglist.index(macro.value[i].value)
# Conversion of argument to a string
if i > 0 and macro.value[i-1].value == '#':
macro.value[i] = copy.copy(macro.value[i])
macro.value[i].type = self.t_STRING
del macro.value[i-1]
macro.str_patch.append((argnum,i-1))
continue
# Concatenation
elif (i > 0 and macro.value[i-1].value == '##'):
macro.patch.append(('c',argnum,i-1))
del macro.value[i-1]
continue
elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
macro.patch.append(('c',argnum,i))
i += 1
continue
# Standard expansion
else:
macro.patch.append(('e',argnum,i))
elif macro.value[i].value == '##':
if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
(macro.value[i+1].value == macro.vararg):
macro.var_comma_patch.append(i-1)
i += 1
macro.patch.sort(key=lambda x: x[2],reverse=True)
# ----------------------------------------------------------------------
# macro_expand_args()
#
# Given a Macro and list of arguments (each a token list), this method
# returns an expanded version of a macro. The return value is a token sequence
# representing the replacement macro tokens
# ----------------------------------------------------------------------
def macro_expand_args(self,macro,args):
# Make a copy of the macro token sequence
rep = [copy.copy(_x) for _x in macro.value]
# Make string expansion patches. These do not alter the length of the replacement sequence
str_expansion = {}
for argnum, i in macro.str_patch:
if argnum not in str_expansion:
str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
rep[i] = copy.copy(rep[i])
rep[i].value = str_expansion[argnum]
# Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
comma_patch = False
if macro.variadic and not args[-1]:
for i in macro.var_comma_patch:
rep[i] = None
comma_patch = True
# Make all other patches. The order of these matters. It is assumed that the patch list
# has been sorted in reverse order of patch location since replacements will cause the
# size of the replacement sequence to expand from the patch point.
expanded = { }
for ptype, argnum, i in macro.patch:
# Concatenation. Argument is left unexpanded
if ptype == 'c':
rep[i:i+1] = args[argnum]
# Normal expansion. Argument is macro expanded first
elif ptype == 'e':
if argnum not in expanded:
expanded[argnum] = self.expand_macros(args[argnum])
rep[i:i+1] = expanded[argnum]
# Get rid of removed comma if necessary
if comma_patch:
rep = [_i for _i in rep if _i]
return rep
# ----------------------------------------------------------------------
# expand_macros()
#
# Given a list of tokens, this function performs macro expansion.
# The expanded argument is a dictionary that contains macros already
# expanded. This is used to prevent infinite recursion.
# ----------------------------------------------------------------------
def expand_macros(self,tokens,expanded=None):
if expanded is None:
expanded = {}
i = 0
while i < len(tokens):
t = tokens[i]
if t.type == self.t_ID:
if t.value in self.macros and t.value not in expanded:
# Yes, we found a macro match
expanded[t.value] = True
m = self.macros[t.value]
if not m.arglist:
# A simple macro
ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
for e in ex:
e.lineno = t.lineno
tokens[i:i+1] = ex
i += len(ex)
else:
# A macro with arguments
j = i + 1
while j < len(tokens) and tokens[j].type in self.t_WS:
j += 1
if tokens[j].value == '(':
tokcount,args,positions = self.collect_args(tokens[j:])
if not m.variadic and len(args) != len(m.arglist):
self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
i = j + tokcount
elif m.variadic and len(args) < len(m.arglist)-1:
if len(m.arglist) > 2:
self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
else:
self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
i = j + tokcount
else:
if m.variadic:
if len(args) == len(m.arglist)-1:
args.append([])
else:
args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
del args[len(m.arglist):]
# Get macro replacement text
rep = self.macro_expand_args(m,args)
rep = self.expand_macros(rep,expanded)
for r in rep:
r.lineno = t.lineno
tokens[i:j+tokcount] = rep
i += len(rep)
del expanded[t.value]
continue
elif t.value == '__LINE__':
t.type = self.t_INTEGER
t.value = self.t_INTEGER_TYPE(t.lineno)
i += 1
return tokens
# ----------------------------------------------------------------------
# evalexpr()
#
# Evaluate an expression token sequence for the purposes of evaluating
# integral expressions.
# ----------------------------------------------------------------------
def evalexpr(self,tokens):
# tokens = tokenize(line)
# Search for defined macros
i = 0
while i < len(tokens):
if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
j = i + 1
needparen = False
result = "0L"
while j < len(tokens):
if tokens[j].type in self.t_WS:
j += 1
continue
elif tokens[j].type == self.t_ID:
if tokens[j].value in self.macros:
result = "1L"
else:
result = "0L"
if not needparen: break
elif tokens[j].value == '(':
needparen = True
elif tokens[j].value == ')':
break
else:
self.error(self.source,tokens[i].lineno,"Malformed defined()")
j += 1
tokens[i].type = self.t_INTEGER
tokens[i].value = self.t_INTEGER_TYPE(result)
del tokens[i+1:j+1]
i += 1
tokens = self.expand_macros(tokens)
for i,t in enumerate(tokens):
if t.type == self.t_ID:
tokens[i] = copy.copy(t)
tokens[i].type = self.t_INTEGER
tokens[i].value = self.t_INTEGER_TYPE("0L")
elif t.type == self.t_INTEGER:
tokens[i] = copy.copy(t)
# Strip off any trailing suffixes
tokens[i].value = str(tokens[i].value)
while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
tokens[i].value = tokens[i].value[:-1]
expr = "".join([str(x.value) for x in tokens])
expr = expr.replace("&&"," and ")
expr = expr.replace("||"," or ")
expr = expr.replace("!"," not ")
try:
result = eval(expr)
except StandardError:
self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
result = 0
return result
# ----------------------------------------------------------------------
# parsegen()
#
# Parse an input string/
# ----------------------------------------------------------------------
def parsegen(self,input,source=None):
# Replace trigraph sequences
t = trigraph(input)
lines = self.group_lines(t)
if not source:
source = ""
self.define("__FILE__ \"%s\"" % source)
self.source = source
chunk = []
enable = True
iftrigger = False
ifstack = []
for x in lines:
for i,tok in enumerate(x):
if tok.type not in self.t_WS: break
if tok.value == '#':
# Preprocessor directive
for tok in x:
if tok in self.t_WS and '\n' in tok.value:
chunk.append(tok)
dirtokens = self.tokenstrip(x[i+1:])
if dirtokens:
name = dirtokens[0].value
args = self.tokenstrip(dirtokens[1:])
else:
name = ""
args = []
if name == 'define':
if enable:
for tok in self.expand_macros(chunk):
yield tok
chunk = []
self.define(args)
elif name == 'include':
if enable:
for tok in self.expand_macros(chunk):
yield tok
chunk = []
oldfile = self.macros['__FILE__']
for tok in self.include(args):
yield tok
self.macros['__FILE__'] = oldfile
self.source = source
elif name == 'undef':
if enable:
for tok in self.expand_macros(chunk):
yield tok
chunk = []
self.undef(args)
elif name == 'ifdef':
ifstack.append((enable,iftrigger))
if enable:
if not args[0].value in self.macros:
enable = False
iftrigger = False
else:
iftrigger = True
elif name == 'ifndef':
ifstack.append((enable,iftrigger))
if enable:
if args[0].value in self.macros:
enable = False
iftrigger = False
else:
iftrigger = True
elif name == 'if':
ifstack.append((enable,iftrigger))
if enable:
result = self.evalexpr(args)
if not result:
enable = False
iftrigger = False
else:
iftrigger = True
elif name == 'elif':
if ifstack:
if ifstack[-1][0]: # We only pay attention if outer "if" allows this
if enable: # If already true, we flip enable False
enable = False
elif not iftrigger: # If False, but not triggered yet, we'll check expression
result = self.evalexpr(args)
if result:
enable = True
iftrigger = True
else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
elif name == 'else':
if ifstack:
if ifstack[-1][0]:
if enable:
enable = False
elif not iftrigger:
enable = True
iftrigger = True
else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
elif name == 'endif':
if ifstack:
enable,iftrigger = ifstack.pop()
else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
else:
# Unknown preprocessor directive
pass
else:
# Normal text
if enable:
chunk.extend(x)
for tok in self.expand_macros(chunk):
yield tok
chunk = []
# ----------------------------------------------------------------------
# include()
#
# Implementation of file-inclusion
# ----------------------------------------------------------------------
def include(self,tokens):
# Try to extract the filename and then process an include file
if not tokens:
return
if tokens:
if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
tokens = self.expand_macros(tokens)
if tokens[0].value == '<':
# Include <...>
i = 1
while i < len(tokens):
if tokens[i].value == '>':
break
i += 1
else:
print("Malformed #include <...>")
return
filename = "".join([x.value for x in tokens[1:i]])
path = self.path + [""] + self.temp_path
elif tokens[0].type == self.t_STRING:
filename = tokens[0].value[1:-1]
path = self.temp_path + [""] + self.path
else:
print("Malformed #include statement")
return
for p in path:
iname = os.path.join(p,filename)
try:
data = open(iname,"r").read()
dname = os.path.dirname(iname)
if dname:
self.temp_path.insert(0,dname)
for tok in self.parsegen(data,filename):
yield tok
if dname:
del self.temp_path[0]
break
except IOError:
pass
else:
print("Couldn't find '%s'" % filename)
# ----------------------------------------------------------------------
# define()
#
# Define a new macro
# ----------------------------------------------------------------------
def define(self,tokens):
if isinstance(tokens,(str,unicode)):
tokens = self.tokenize(tokens)
linetok = tokens
try:
name = linetok[0]
if len(linetok) > 1:
mtype = linetok[1]
else:
mtype = None
if not mtype:
m = Macro(name.value,[])
self.macros[name.value] = m
elif mtype.type in self.t_WS:
# A normal macro
m = Macro(name.value,self.tokenstrip(linetok[2:]))
self.macros[name.value] = m
elif mtype.value == '(':
# A macro with arguments
tokcount, args, positions = self.collect_args(linetok[1:])
variadic = False
for a in args:
if variadic:
print("No more arguments may follow a variadic argument")
break
astr = "".join([str(_i.value) for _i in a])
if astr == "...":
variadic = True
a[0].type = self.t_ID
a[0].value = '__VA_ARGS__'
variadic = True
del a[1:]
continue
elif astr[-3:] == "..." and a[0].type == self.t_ID:
variadic = True
del a[1:]
# If, for some reason, "." is part of the identifier, strip off the name for the purposes
# of macro expansion
if a[0].value[-3:] == '...':
a[0].value = a[0].value[:-3]
continue
if len(a) > 1 or a[0].type != self.t_ID:
print("Invalid macro argument")
break
else:
mvalue = self.tokenstrip(linetok[1+tokcount:])
i = 0
while i < len(mvalue):
if i+1 < len(mvalue):
if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
del mvalue[i]
continue
elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
del mvalue[i+1]
i += 1
m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
self.macro_prescan(m)
self.macros[name.value] = m
else:
print("Bad macro definition")
except LookupError:
print("Bad macro definition")
# ----------------------------------------------------------------------
# undef()
#
# Undefine a macro
# ----------------------------------------------------------------------
def undef(self,tokens):
id = tokens[0].value
try:
del self.macros[id]
except LookupError:
pass
# ----------------------------------------------------------------------
# parse()
#
# Parse input text.
# ----------------------------------------------------------------------
def parse(self,input,source=None,ignore={}):
self.ignore = ignore
self.parser = self.parsegen(input,source)
# ----------------------------------------------------------------------
# token()
#
# Method to return individual tokens
# ----------------------------------------------------------------------
def token(self):
try:
while True:
tok = next(self.parser)
if tok.type not in self.ignore: return tok
except StopIteration:
self.parser = None
return None
if __name__ == '__main__':
import ply.lex as lex
lexer = lex.lex()
# Run a preprocessor
import sys
f = open(sys.argv[1])
input = f.read()
p = Preprocessor(lexer)
p.parse(input,sys.argv[1])
while True:
tok = p.token()
if not tok: break
print(p.source, tok)

133
ply/ply/ctokens.py Normal file
View File

@ -0,0 +1,133 @@
# ----------------------------------------------------------------------
# ctokens.py
#
# Token specifications for symbols in ANSI C and C++. This file is
# meant to be used as a library in other tokenizers.
# ----------------------------------------------------------------------
# Reserved words
tokens = [
# Literals (identifier, integer constant, float constant, string constant, char const)
'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
# Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
# Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
# Increment/decrement (++,--)
'PLUSPLUS', 'MINUSMINUS',
# Structure dereference (->)
'ARROW',
# Ternary operator (?)
'TERNARY',
# Delimeters ( ) [ ] { } , . ; :
'LPAREN', 'RPAREN',
'LBRACKET', 'RBRACKET',
'LBRACE', 'RBRACE',
'COMMA', 'PERIOD', 'SEMI', 'COLON',
# Ellipsis (...)
'ELLIPSIS',
]
# Operators
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MODULO = r'%'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
t_LSHIFT = r'<<'
t_RSHIFT = r'>>'
t_LOR = r'\|\|'
t_LAND = r'&&'
t_LNOT = r'!'
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_EQ = r'=='
t_NE = r'!='
# Assignment operators
t_EQUALS = r'='
t_TIMESEQUAL = r'\*='
t_DIVEQUAL = r'/='
t_MODEQUAL = r'%='
t_PLUSEQUAL = r'\+='
t_MINUSEQUAL = r'-='
t_LSHIFTEQUAL = r'<<='
t_RSHIFTEQUAL = r'>>='
t_ANDEQUAL = r'&='
t_OREQUAL = r'\|='
t_XOREQUAL = r'^='
# Increment/decrement
t_INCREMENT = r'\+\+'
t_DECREMENT = r'--'
# ->
t_ARROW = r'->'
# ?
t_TERNARY = r'\?'
# Delimeters
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
t_ELLIPSIS = r'\.\.\.'
# Identifiers
t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
# Integer literal
t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
# Floating literal
t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
# String literal
t_STRING = r'\"([^\\\n]|(\\.))*?\"'
# Character constant 'c' or L'c'
t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
# Comment (C-Style)
def t_COMMENT(t):
r'/\*(.|\n)*?\*/'
t.lexer.lineno += t.value.count('\n')
return t
# Comment (C++-Style)
def t_CPPCOMMENT(t):
r'//.*\n'
t.lexer.lineno += 1
return t

1058
ply/ply/lex.py Normal file

File diff suppressed because it is too large Load Diff

3276
ply/ply/yacc.py Normal file

File diff suppressed because it is too large Load Diff

32
ply/setup.py Executable file
View File

@ -0,0 +1,32 @@
#!/usr/bin/python
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
setup(name = "ply",
description="Python Lex & Yacc",
long_description = """
PLY is yet another implementation of lex and yacc for Python. Some notable
features include the fact that its implemented entirely in Python and it
uses LALR(1) parsing which is efficient and well suited for larger grammars.
PLY provides most of the standard lex/yacc features including support for empty
productions, precedence rules, error recovery, and support for ambiguous grammars.
PLY is extremely easy to use and provides very extensive error checking.
It is compatible with both Python 2 and Python 3.
""",
license="""BSD""",
version = "3.4",
author = "David Beazley",
author_email = "dave@dabeaz.com",
maintainer = "David Beazley",
maintainer_email = "dave@dabeaz.com",
url = "http://www.dabeaz.com/ply/",
packages = ['ply'],
classifiers = [
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 2',
]
)

7
ply/test/README Normal file
View File

@ -0,0 +1,7 @@
This directory mostly contains tests for various types of error
conditions. To run:
$ python testlex.py .
$ python testyacc.py .
The script 'cleanup.sh' cleans up this directory to its original state.

49
ply/test/calclex.py Normal file
View File

@ -0,0 +1,49 @@
# -----------------------------------------------------------------------------
# calclex.py
# -----------------------------------------------------------------------------
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
lex.lex()

4
ply/test/cleanup.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/sh
rm -rf *~ *.pyc *.pyo *.dif *.out __pycache__

54
ply/test/lex_closure.py Normal file
View File

@ -0,0 +1,54 @@
# -----------------------------------------------------------------------------
# lex_closure.py
# -----------------------------------------------------------------------------
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
def make_calc():
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
return lex.lex()
make_calc()
lex.runmain(data="3+4")

26
ply/test/lex_doc1.py Normal file
View File

@ -0,0 +1,26 @@
# lex_doc1.py
#
# Missing documentation string
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
def t_NUMBER(t):
pass
def t_error(t):
pass
lex.lex()

29
ply/test/lex_dup1.py Normal file
View File

@ -0,0 +1,29 @@
# lex_dup1.py
#
# Duplicated rule specifiers
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'
t_NUMBER = r'\d+'
def t_error(t):
pass
lex.lex()

33
ply/test/lex_dup2.py Normal file
View File

@ -0,0 +1,33 @@
# lex_dup2.py
#
# Duplicated rule specifiers
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
def t_NUMBER(t):
r'\d+'
pass
def t_NUMBER(t):
r'\d+'
pass
def t_error(t):
pass
lex.lex()

31
ply/test/lex_dup3.py Normal file
View File

@ -0,0 +1,31 @@
# lex_dup3.py
#
# Duplicated rule specifiers
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'
def t_NUMBER(t):
r'\d+'
pass
def t_error(t):
pass
lex.lex()

20
ply/test/lex_empty.py Normal file
View File

@ -0,0 +1,20 @@
# lex_empty.py
#
# No rules defined
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
lex.lex()

24
ply/test/lex_error1.py Normal file
View File

@ -0,0 +1,24 @@
# lex_error1.py
#
# Missing t_error() rule
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'
lex.lex()

Some files were not shown because too many files have changed in this diff Show More